diff --git a/package/CHANGELOG b/package/CHANGELOG index e5e506d82a1..ea0c0f0f312 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -56,6 +56,9 @@ Enhancements * Added converter between Cartesian and Bond-Angle-Torsion coordinates (PR #2668) * Added Hydrogen Bond Lifetime via existing autocorrelation features (PR #2791) * Added Hydrogen Bond Lifetime keyword "between" (PR #2791) + * Added lib.pickle_file_io module for pickling file handlers. (PR #2723) + * Added pickle function to `Universe` and all Readers (without transformation) + (PR #2723) * Dead code removed from the TPR parser and increased test coverage (PR #2840) * TPR parser exposes the elements topology attribute (PR #2858, see Issue #2553) @@ -77,6 +80,7 @@ Changes * Removes deprecated ProgressMeter (Issue #2739) * Removes deprecated MDAnalysis.units.N_Avogadro (PR #2737) * Dropped Python 2 support + * Set Python 3.6 as the minimum supported version (Issue #2541) * Changes the minimal NumPy version to 1.16.0 (Issue #2827, PR #2831) * Sets the minimal RDKit version for CI to 2020.03.1 (Issue #2827, PR #2831) * Removes deprecated waterdynamics.HydrogenBondLifetimes (PR #2842) diff --git a/package/MDAnalysis/coordinates/DLPoly.py b/package/MDAnalysis/coordinates/DLPoly.py index b4e447aabfe..9e499a9b58a 100644 --- a/package/MDAnalysis/coordinates/DLPoly.py +++ b/package/MDAnalysis/coordinates/DLPoly.py @@ -32,6 +32,7 @@ from . import base from . import core +from ..lib import util _DLPOLY_UNITS = {'length': 'Angstrom', 'velocity': 'Angstrom/ps', 'time': 'ps'} @@ -149,7 +150,7 @@ def __init__(self, filename, **kwargs): super(HistoryReader, self).__init__(filename, **kwargs) # "private" file handle - self._file = open(self.filename, 'r') + self._file = util.anyopen(self.filename, 'r') self.title = self._file.readline().strip() self._levcfg, self._imcon, self.n_atoms = np.int64(self._file.readline().split()[:3]) self._has_vels = True if self._levcfg > 0 else False diff --git a/package/MDAnalysis/coordinates/GSD.py b/package/MDAnalysis/coordinates/GSD.py index 5d10ef7348c..c2bdbe7a523 100644 --- a/package/MDAnalysis/coordinates/GSD.py +++ b/package/MDAnalysis/coordinates/GSD.py @@ -44,13 +44,20 @@ .. autoclass:: GSDReader :inherited-members: +.. autoclass:: GSDPicklable + :members: + +.. autofunction:: gsd_pickle_open + """ import numpy as np -import os +import gsd +import gsd.fl import gsd.hoomd from . import base + class GSDReader(base.ReaderBase): """Reader for the GSD format. @@ -69,6 +76,10 @@ def __init__(self, filename, **kwargs): .. versionadded:: 0.17.0 + .. versionchanged:: 2.0.0 + Now use a picklable :class:`gsd.hoomd.HOOMDTrajectory`-- + :class:`GSDPicklable` + """ super(GSDReader, self).__init__(filename, **kwargs) self.filename = filename @@ -77,10 +88,10 @@ def __init__(self, filename, **kwargs): self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) self._read_next_timestep() - def open_trajectory(self) : + def open_trajectory(self): """opens the trajectory file using gsd.hoomd module""" self._frame = -1 - self._file = gsd.hoomd.open(self.filename,mode='rb') + self._file = gsd_pickle_open(self.filename, mode='rb') def close(self): """close reader""" @@ -97,7 +108,7 @@ def _reopen(self): self.open_trajectory() def _read_frame(self, frame): - try : + try: myframe = self._file[frame] except IndexError: raise IOError from None @@ -111,20 +122,145 @@ def _read_frame(self, frame): # set frame box dimensions self.ts.dimensions = myframe.configuration.box - for i in range(3,6) : - self.ts.dimensions[i] = np.arccos(self.ts.dimensions[i]) * 180.0 / np.pi + self.ts.dimensions[3:] = np.rad2deg(np.arccos(self.ts.dimensions[3:])) # set particle positions frame_positions = myframe.particles.position n_atoms_now = frame_positions.shape[0] - if n_atoms_now != self.n_atoms : + if n_atoms_now != self.n_atoms: raise ValueError("Frame %d has %d atoms but the initial frame has %d" " atoms. MDAnalysis in unable to deal with variable" " topology!"%(frame, n_atoms_now, self.n_atoms)) - else : + else: self.ts.positions = frame_positions return self.ts - def _read_next_timestep(self) : + def _read_next_timestep(self): """read next frame in trajectory""" return self._read_frame(self._frame + 1) + + +class GSDPicklable(gsd.hoomd.HOOMDTrajectory): + """Hoomd GSD file object (read-only) that can be pickled. + + This class provides a file-like object (as by :func:`gsd.hoomd.open`, + namely :class:`gsd.hoodm.HOOMDTrajectory`) that, unlike file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename and mode of :class:`gsd.fl.GSDFile` in + the file are saved. On unpickling, the file is opened by filename. + This means that for a successful unpickle, the original file still has to + be accessible with its filename. + + Note + ---- + Open hoomd GSD files with `gsd_pickle_open`. + After pickling, the current frame is reset. `universe.trajectory[i]` has + to be used to return to its original frame. + + Parameters + ---------- + file: :class:`gsd.fl.GSDFile` + File to access. + + Example + ------- + :: + + gsdfileobj = gsd.fl.open(name=filename, + mode='rb', + application='gsd.hoomd '+gsd.__version__, + schema='hoomd', + schema_version=[1, 3]) + file = GSDPicklable(gsdfileobj) + file_pickled = pickle.loads(pickle.dumps(file)) + + See Also + --------- + :func:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` + :func:`MDAnalysis.lib.picklable_file_io.BufferIOPicklable` + :func:`MDAnalysis.lib.picklable_file_io.TextIOPicklable` + :func:`MDAnalysis.lib.picklable_file_io.GzipPicklable` + :func:`MDAnalysis.lib.picklable_file_io.BZ2Picklable` + + + .. versionadded:: 2.0.0 + """ + def __getstate__(self): + return self.file.name, self.file.mode + + def __setstate__(self, args): + gsd_version = gsd.__version__ + schema_version = [1, 4] if gsd_version >= '1.9.0' else [1, 3] + gsdfileobj = gsd.fl.open(name=args[0], + mode=args[1], + application='gsd.hoomd ' + gsd_version, + schema='hoomd', + schema_version=schema_version) + self.__init__(gsdfileobj) + + +def gsd_pickle_open(name, mode='rb'): + """Open hoomd schema GSD file with pickle function implemented. + + This function returns a GSDPicklable object. It can be used as a + context manager, and replace the built-in :func:`gsd.hoomd.open` function + in read mode that only returns an unpicklable file object. + + Schema version will depend on the version of gsd module. + + Note + ---- + Can be only used with read mode. + + Parameters + ---------- + name : str + a filename given a text or byte string. + mode: {'r', 'rb'} (optional) + 'r', 'rb': open for reading; + + Returns + ------- + stream-like object: GSDPicklable + + Raises + ------ + ValueError + if `mode` is not one of the allowed read modes + + Examples + ------- + open as context manager:: + + with gsd_pickle_open('filename') as f: + line = f.readline() + + open as function:: + + f = gsd_pickle_open('filename') + line = f.readline() + f.close() + + See Also + -------- + :func:`MDAnalysis.lib.util.anyopen` + :func:`MDAnalysis.lib.picklable_file_io.pickle_open` + :func:`MDAnalysis.lib.picklable_file_io.bz2_pickle_open` + :func:`MDAnalysis.lib.picklable_file_io.gzip_pickle_open` + :func:`gsd.hoomd.open` + + + .. versionadded:: 2.0.0 + """ + gsd_version = gsd.__version__ + schema_version = [1, 4] if gsd_version >= '1.9.0' else [1, 3] + if mode not in {'r', 'rb'}: + raise ValueError("Only read mode ('r', 'rb') " + "files can be pickled.") + gsdfileobj = gsd.fl.open(name=name, + mode=mode, + application='gsd.hoomd ' + gsd_version, + schema='hoomd', + schema_version=schema_version) + return GSDPicklable(gsdfileobj) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 04c1a463f4c..09810736cb1 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -85,6 +85,8 @@ .. autoclass:: NCDFWriter :members: +.. autoclass:: NCDFPicklable + :members: .. _ascii-trajectories: @@ -158,7 +160,6 @@ import MDAnalysis from . import base from ..lib import util - logger = logging.getLogger("MDAnalysis.coordinates.AMBER") @@ -450,6 +451,9 @@ class NCDFReader(base.ReaderBase): .. versionchanged:: 1.0.0 Support for reading `degrees` units for `cell_angles` has now been removed (Issue #2327) + .. versionchanged:: 2.0.0 + Now use a picklable :class:`scipy.io.netcdf.netcdf_file`-- + :class:`NCDFPicklable`. """ @@ -469,8 +473,8 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): super(NCDFReader, self).__init__(filename, **kwargs) - self.trjfile = scipy.io.netcdf.netcdf_file(self.filename, - mmap=self._mmap) + self.trjfile = NCDFPicklable(self.filename, + mmap=self._mmap) # AMBER NetCDF files should always have a convention try: @@ -1075,3 +1079,56 @@ def close(self): if self.trjfile is not None: self.trjfile.close() self.trjfile = None + + +class NCDFPicklable(scipy.io.netcdf.netcdf_file): + """NetCDF file object (read-only) that can be pickled. + + This class provides a file-like object (as returned by + :class:`scipy.io.netcdf.netcdf_file`) that, + unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename and mmap of the open file handle in + the file are saved. On unpickling, the file is opened by filename, + and the mmap file is loaded. + This means that for a successful unpickle, the original file still has to + be accessible with its filename. + + Parameters + ---------- + filename : str or file-like + a filename given a text or byte string. + mmap : None or bool, optional + Whether to mmap `filename` when reading. True when `filename` + is a file name, False when `filename` is a file-like object. + + Example + ------- + :: + + f = NCDFPicklable(NCDF) + print(f.variables['coordinates'].data) + f.close() + + can also be used as context manager:: + + with NCDFPicklable(NCDF) as f: + print(f.variables['coordinates'].data) + + See Also + --------- + :class:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BufferIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.TextIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.GzipPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BZ2Picklable` + + + .. versionadded:: 2.0.0 + """ + def __getstate__(self): + return self.filename, self.use_mmap + + def __setstate__(self, args): + self.__init__(args[0], mmap=args[1]) diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index 01700d05361..30cf1e89f03 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -226,6 +226,7 @@ class Timestep(object): create a timestep object with space for n_atoms + .. versionchanged:: 0.11.0 Added :meth:`from_timestep` and :meth:`from_coordinates` constructor methods. @@ -233,6 +234,9 @@ class Timestep(object): :attr:`n_atoms` now a read only property. :attr:`frame` now 0-based instead of 1-based. Attributes `status` and `step` removed. + .. versionchanged:: 2.0.0 + Timestep now can be (un)pickled. Weakref for Reader + will be dropped. """ order = 'F' @@ -300,7 +304,6 @@ def __init__(self, n_atoms, **kwargs): # set up aux namespace for adding auxiliary data self.aux = Namespace() - @classmethod def from_timestep(cls, other, **kwargs): """Create a copy of another Timestep, in the format of this Timestep @@ -381,6 +384,22 @@ def from_coordinates(cls, return ts + def __getstate__(self): + # The `dt` property is lazy loaded. + # We need to load it once from the `_reader` (if exists) + # attached to this timestep to get the dt value. + # This will help to (un)pickle a `Timestep` without pickling `_reader` + # and retain its dt value. + self.dt + + state = self.__dict__.copy() + state.pop('_reader', None) + + return state + + def __setstate__(self, state): + self.__dict__.update(state) + def _init_unitcell(self): """Create custom datastructure for :attr:`_unitcell`.""" # override for other Timesteps @@ -442,7 +461,7 @@ def __getitem__(self, atoms): return self._pos[atoms] else: raise TypeError - + def __getattr__(self, attr): # special-case timestep info if attr in ('velocities', 'forces', 'positions'): @@ -1400,6 +1419,9 @@ class ProtoReader(IOBase, metaclass=_Readermeta): .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based + .. versionchanged:: 2.0.0 + Now supports (un)pickle. Upon unpickling, + the current timestep is retained by reconstrunction. """ #: The appropriate Timestep class, e.g. @@ -2060,6 +2082,9 @@ def _apply_transformations(self, ts): return ts + def __setstate__(self, state): + self.__dict__ = state + self[self.ts.frame] class ReaderBase(ProtoReader): diff --git a/package/MDAnalysis/coordinates/chain.py b/package/MDAnalysis/coordinates/chain.py index d76f34a1140..9454397e794 100644 --- a/package/MDAnalysis/coordinates/chain.py +++ b/package/MDAnalysis/coordinates/chain.py @@ -211,6 +211,9 @@ class ChainReader(base.ProtoReader): added ``continuous`` trajectory option .. versionchanged:: 0.19.0 limit output of __repr__ + .. versionchanged:: 2.0.0 + Now ChainReader can be (un)pickled. Upon unpickling, + current timestep is retained. """ format = 'CHAIN' @@ -414,6 +417,25 @@ def _get_local_frame(self, k): f = k - self._start_frames[i] return i, f + def __getstate__(self): + state = self.__dict__.copy() + # save ts temporarily otherwise it will be changed during rewinding. + state['ts'] = self.ts.__deepcopy__() + + # the ts.frame of each reader is set to the chained frame index during + # iteration, thus we need to rewind the readers that have been used. + # PR #2723 + for reader in state['readers'][:self.__active_reader_index + 1]: + reader.rewind() + + # retrieve the current ts + self.ts = state['ts'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.ts.frame = self.__current_frame + # methods that can change with the current reader def convert_time_from_native(self, t): return self.active_reader.convert_time_from_native(t) diff --git a/package/MDAnalysis/coordinates/chemfiles.py b/package/MDAnalysis/coordinates/chemfiles.py index 9d01f7d6595..8538c8fc27c 100644 --- a/package/MDAnalysis/coordinates/chemfiles.py +++ b/package/MDAnalysis/coordinates/chemfiles.py @@ -37,8 +37,8 @@ .. autoclass:: ChemfilesWriter +.. autoclass:: ChemfilesPicklable """ -import numpy as np from distutils.version import LooseVersion import warnings @@ -48,6 +48,14 @@ import chemfiles except ImportError: HAS_CHEMFILES = False + + # Allow building documentation even if chemfiles is not installed + import imp + + class MockTrajectory: + pass + chemfiles = imp.new_module("chemfiles") + chemfiles.Trajectory = MockTrajectory else: HAS_CHEMFILES = True @@ -132,7 +140,7 @@ def _open(self): if isinstance(self.filename, chemfiles.Trajectory): self._file = self.filename else: - self._file = chemfiles.Trajectory(self.filename, 'r', self._format) + self._file = ChemfilesPicklable(self.filename, 'r', self._format) def close(self): """close reader""" @@ -381,3 +389,71 @@ def _topology_to_chemfiles(self, obj, n_atoms): topology.add_bond(bond.atoms[0].ix, bond.atoms[1].ix) return topology + + +class ChemfilesPicklable(chemfiles.Trajectory): + """Chemfiles file object (read-only) that can be pickled. + + This class provides a file-like object (as returned by + :class:`chemfiles.Trajectory`) that, + unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, path, mode, and format of the file handle + are saved. On unpickling, the file is opened by path with mode, + and saved format. + This means that for a successful unpickle, the original file still has + to be accessible with its filename. + + Note + ---- + Can only be used with reading ('r') mode. + Upon pickling, the current frame is reset. `universe.trajectory[i]` has + to be used to return to its original frame. + + Parameters + ---------- + filename : str + a filename given a text or byte string. + mode : 'r' , optional + only 'r' can be used for pickling. + format : '', optional + guessed from the file extension if empty. + + Example + ------- + :: + + f = ChemfilesPicklable(XYZ, 'r', '') + print(f.read()) + f.close() + + can also be used as context manager:: + + with ChemfilesPicklable(XYZ) as f: + print(f.read()) + + See Also + --------- + :class:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BufferIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.TextIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.GzipPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BZ2Picklable` + + + .. versionadded:: 2.0.0 + """ + def __init__(self, path, mode="r", format=""): + if mode != 'r': + raise ValueError("Only read mode ('r') " + "files can be pickled.") + super().__init__(path=path, + mode=mode, + format=format) + + def __getstate__(self): + return self.path, self._Trajectory__mode, self._Trajectory__format + + def __setstate__(self, args): + self.__init__(*args) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 23e216b938a..390acb8e65e 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -311,9 +311,15 @@ class Universe(object): bonds, angles, dihedrals principal ConnectivityGroups for each connectivity type + .. versionchanged:: 1.0.0 Universe() now raises an error. Use Universe(None) or :func:`Universe.empty()` instead. Removed instant selectors. + + .. versionchanged:: 2.0.0 + Universe now can be (un)pickled. + ``topology``, ``trajectory`` and ``anchor_name`` are reserved + upon unpickle. """ # Py3 TODO # def __init__(self, topology=None, *coordinates, all_coordinates=False, @@ -701,7 +707,7 @@ def anchor_name(self): return self._anchor_uuid except AttributeError: # store this so we can later recall it if needed - self._anchor_uuid = uuid.uuid4() + self._anchor_uuid = str(uuid.uuid4()) return self._anchor_uuid @anchor_name.setter @@ -738,10 +744,18 @@ def __repr__(self): n_atoms=len(self.atoms)) def __getstate__(self): - raise NotImplementedError + # Universe's two "legs" of topology and traj both serialise themselves + # the only other state held in Universe is anchor name? + return self.anchor_name, self._topology, self._trajectory + + def __setstate__(self, args): + self._anchor_name = args[0] + self.make_anchor() + + self._topology = args[1] + _generate_from_topology(self) - def __setstate__(self, state): - raise NotImplementedError + self._trajectory = args[2] # Properties @property diff --git a/package/MDAnalysis/lib/__init__.py b/package/MDAnalysis/lib/__init__.py index 108eb53d840..2ba03b03274 100644 --- a/package/MDAnalysis/lib/__init__.py +++ b/package/MDAnalysis/lib/__init__.py @@ -39,3 +39,6 @@ from . import formats from . import pkdtree from . import nsgrid +from .picklable_file_io import (FileIOPicklable, + BufferIOPicklable, + TextIOPicklable) diff --git a/package/MDAnalysis/lib/formats/libdcd.pyx b/package/MDAnalysis/lib/formats/libdcd.pyx index 53719bc8bc2..2229b02f9bd 100644 --- a/package/MDAnalysis/lib/formats/libdcd.pyx +++ b/package/MDAnalysis/lib/formats/libdcd.pyx @@ -261,8 +261,8 @@ cdef class DCDFile: return current_frame = state[1] - self.seek(current_frame) - + self.seek(current_frame - 1) + self.current_frame = current_frame def tell(self): """ diff --git a/package/MDAnalysis/lib/formats/libmdaxdr.pyx b/package/MDAnalysis/lib/formats/libmdaxdr.pyx index 7af90347d6d..54d64166a7c 100644 --- a/package/MDAnalysis/lib/formats/libmdaxdr.pyx +++ b/package/MDAnalysis/lib/formats/libmdaxdr.pyx @@ -306,7 +306,8 @@ cdef class _XDRFile: # where was I current_frame = state[1] - self.seek(current_frame) + self.seek(current_frame - 1) + self.current_frame = current_frame def seek(self, frame): """Seek to Frame. diff --git a/package/MDAnalysis/lib/picklable_file_io.py b/package/MDAnalysis/lib/picklable_file_io.py new file mode 100644 index 00000000000..dd5bef44c14 --- /dev/null +++ b/package/MDAnalysis/lib/picklable_file_io.py @@ -0,0 +1,554 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Picklable read-only I/O classes --- :mod:`MDAnalysis.lib.picklable_file_io` +=========================================================================== + +Provide with an interface for pickling read-only IO file object. +These classes are used for further pickling :class:`MDAnalysis.core.universe` +in a object composition approach. + +.. autoclass:: FileIOPicklable + :members: + +.. autoclass:: BufferIOPicklable + :members: + +.. autoclass:: TextIOPicklable + :members: + +.. autoclass:: BZ2Picklable + :members: + +.. autoclass:: GzipPicklable + :members: + +.. autofunction:: pickle_open + +.. autofunction:: bz2_pickle_open + +.. autofunction:: gzip_pickle_open + + +.. versionadded:: 2.0.0 +""" +import io +import os + +import bz2 +import gzip + + +class FileIOPicklable(io.FileIO): + """File object (read-only) that can be pickled. + + This class provides a file-like object (as returned by :func:`open`, + namely :class:`io.FileIO`) that, unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename and position of the open file handle in + the file are saved. On unpickling, the file is opened by filename, + and the file is seeked to the saved position. + This means that for a successful unpickle, the original file still has to + be accessible with its filename. + + Note + ---- + This class only supports reading files in binary mode. If you need to open + a file in text mode, use the :func:`pickle_open`. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode : str + only reading ('r') mode works. It exists to be consistent + with a wider API. + + Example + ------- + :: + + >>> file = FileIOPicklable(PDB) + >>> file.readline() + >>> file_pickled = pickle.loads(pickle.dumps(file)) + >>> print(file.tell(), file_pickled.tell()) + 55 55 + + See Also + --------- + TextIOPicklable + BufferIOPicklable + + + .. versionadded:: 2.0.0 + """ + def __init__(self, name, mode='r'): + self._mode = mode + super().__init__(name, mode) + + def __getstate__(self): + if self._mode != 'r': + raise RuntimeError("Can only pickle files that were opened " + "in read mode, not {}".format(self._mode)) + return self.name, self.tell() + + def __setstate__(self, args): + name = args[0] + super().__init__(name, mode='r') + self.seek(args[1]) + + +class BufferIOPicklable(io.BufferedReader): + """A picklable buffer object for read-only FileIO object. + + This class provides a buffered :class:`io.BufferedReader` + that can be pickled. + Note that this only works in read mode. + + Parameters + ---------- + raw : FileIO object + + Example + ------- + :: + + file = FileIOPicklable('filename') + buffer_wrapped = BufferIOPicklable(file) + + See Also + --------- + FileIOPicklable + TextIOPicklable + + + .. versionadded:: 2.0.0 + """ + def __init__(self, raw): + super().__init__(raw) + self.raw_class = raw.__class__ + + def __getstate__(self): + return self.raw_class, self.name, self.tell() + + def __setstate__(self, args): + raw_class = args[0] + name = args[1] + raw = raw_class(name) + super().__init__(raw) + self.seek(args[2]) + + +class TextIOPicklable(io.TextIOWrapper): + """Character and line based picklable file-like object. + + This class provides a file-like :class:`io.TextIOWrapper` object that can + be pickled. Note that this only works in read mode. + + Note + ---- + After pickling, the current position is reset. `universe.trajectory[i]` has + to be used to return to its original frame. + + + Parameters + ---------- + raw : FileIO object + + Example + ------- + :: + + file = FileIOPicklable('filename') + text_wrapped = TextIOPicklable(file) + + See Also + --------- + FileIOPicklable + BufferIOPicklable + + + .. versionadded:: 2.0.0 + """ + def __init__(self, raw): + super().__init__(raw) + self.raw_class = raw.__class__ + + def __getstate__(self): + try: + name = self.name + except AttributeError: + # This is kind of ugly--BZ2File does not save its name. + name = self.buffer._fp.name + return self.raw_class, name + + def __setstate__(self, args): + raw_class = args[0] + name = args[1] + # raw_class is used for further expansion this functionality to + # Gzip files, which also requires a text wrapper. + raw = raw_class(name) + super().__init__(raw) + + +class BZ2Picklable(bz2.BZ2File): + """File object (read-only) for bzip2 (de)compression that can be pickled. + + This class provides a file-like object (as returned by :func:`bz2.open`, + namely :class:`bz2.BZ2File`) that, unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename and position of the open file handle in + the file are saved. On unpickling, the file is opened by filename, + and the file is seeked to the saved position. + This means that for a successful unpickle, the original file still has to + be accessible with its filename. + + Note + ---- + This class only supports reading files in binary mode. If you need to open + to open a compressed file in text mode, use :func:`bz2_pickle_open`. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode : str + can only be 'r', 'rb' to make pickle work. + + Example + ------- + :: + + >>> file = BZ2Picklable(XYZ_bz2) + >>> file.readline() + >>> file_pickled = pickle.loads(pickle.dumps(file)) + >>> print(file.tell(), file_pickled.tell()) + 5 5 + + See Also + --------- + FileIOPicklable + BufferIOPicklable + TextIOPicklable + GzipPicklable + + + .. versionadded:: 2.0.0 + """ + def __init__(self, name, mode='rb'): + self._bz_mode = mode + super().__init__(name, mode) + + def __getstate__(self): + if not self._bz_mode.startswith('r'): + raise RuntimeError("Can only pickle files that were opened " + "in read mode, not {}".format(self._bz_mode)) + return self._fp.name, self.tell() + + def __setstate__(self, args): + super().__init__(args[0]) + self.seek(args[1]) + + +class GzipPicklable(gzip.GzipFile): + """Gzip file object (read-only) that can be pickled. + + This class provides a file-like object (as returned by :func:`gzip.open`, + namely :class:`gzip.GzipFile`) that, unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename and position of the open file handle in + the file are saved. On unpickling, the file is opened by filename, + and the file is seeked to the saved position. + This means that for a successful unpickle, the original file still has to + be accessible with its filename. + + Note + ---- + This class only supports reading files in binary mode. If you need to open + to open a compressed file in text mode, use the :func:`gzip_pickle_open`. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode : str + can only be 'r', 'rb' to make pickle work. + + Example + ------- + :: + + >>> file = GzipPicklable(MMTF_gz) + >>> file.readline() + >>> file_pickled = pickle.loads(pickle.dumps(file)) + >>> print(file.tell(), file_pickled.tell()) + 1218 1218 + + See Also + --------- + FileIOPicklable + BufferIOPicklable + TextIOPicklable + BZ2Picklable + + + .. versionadded:: 2.0.0 + """ + def __init__(self, name, mode='rb'): + self._gz_mode = mode + super().__init__(name, mode) + + def __getstate__(self): + if not self._gz_mode.startswith('r'): + raise RuntimeError("Can only pickle files that were opened " + "in read mode, not {}".format(self._gz_mode)) + return self.name, self.tell() + + def __setstate__(self, args): + super().__init__(args[0]) + self.seek(args[1]) + + +def pickle_open(name, mode='rt'): + """Open file and return a stream with pickle function implemented. + + This function returns a FileIOPicklable object wrapped in a + BufferIOPicklable class when given the "rb" reading mode, + or a FileIOPicklable object wrapped in a TextIOPicklable class with the "r" + or "rt" reading mode. It can be used as a context manager, and replace the + built-in :func:`open` function in read mode that only returns an + unpicklable file object. + In order to serialize a :class:`MDAnalysis.core.Universe`, this function + can used to open trajectory/topology files. This object composition is more + flexible and easier than class inheritance to implement pickling + for new readers. + + Note + ---- + Can be only used with read mode. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode: {'r', 'rt', 'rb'} (optional) + 'r': open for reading in text mode; + 'rt': read in text mode (default); + 'rb': read in binary mode; + + Returns + ------- + stream-like object: BufferIOPicklable or TextIOPicklable + when mode is 'r' or 'rt', returns TextIOPicklable; + when mode is 'rb', returns BufferIOPicklable + + Raises + ------ + ValueError + if `mode` is not one of the allowed read modes + + Examples + ------- + open as context manager:: + + with pickle_open('filename') as f: + line = f.readline() + + open as function:: + + f = pickle_open('filename') + line = f.readline() + f.close() + + See Also + -------- + :func:`MDAnalysis.lib.util.anyopen` + :func:`io.open` + + + .. versionadded:: 2.0.0 + """ + if mode not in {'r', 'rt', 'rb'}: + raise ValueError("Only read mode ('r', 'rt', 'rb') " + "files can be pickled.") + name = os.fspath(name) + raw = FileIOPicklable(name) + if mode == 'rb': + return BufferIOPicklable(raw) + elif mode in {'r', 'rt'}: + return TextIOPicklable(raw) + + +def bz2_pickle_open(name, mode='rb'): + """Open a bzip2-compressed file in binary or text mode + with pickle function implemented. + + This function returns a BZ2Picklable object when given the "rb" or "r" + reading mode, or a BZ2Picklable object wrapped in a TextIOPicklable class + with the "rt" reading mode. + It can be used as a context manager, and replace the built-in + :func:`bz2.open` function in read mode that only returns an + unpicklable file object. + + Note + ---- + Can be only used with read mode. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode: {'r', 'rt', 'rb'} (optional) + 'r': open for reading in binary mode; + 'rt': read in text mode; + 'rb': read in binary mode; (default) + + Returns + ------- + stream-like object: BZ2Picklable or TextIOPicklable + when mode is 'rt', returns TextIOPicklable; + when mode is 'r' or 'rb', returns BZ2Picklable + + Raises + ------ + ValueError + if `mode` is not one of the allowed read modes + + Examples + ------- + open as context manager:: + + with bz2_pickle_open('filename') as f: + line = f.readline() + + open as function:: + + f = bz2_pickle_open('filename') + line = f.readline() + f.close() + + See Also + -------- + :func:`io.open` + :func:`bz2.open` + :func:`MDAnalysis.lib.util.anyopen` + :func:`MDAnalysis.lib.picklable_file_io.pickle_open` + :func:`MDAnalysis.lib.picklable_file_io.gzip_pickle_open` + + + .. versionadded:: 2.0.0 + """ + if mode not in {'r', 'rt', 'rb'}: + raise ValueError("Only read mode ('r', 'rt', 'rb') " + "files can be pickled.") + bz_mode = mode.replace("t", "") + binary_file = BZ2Picklable(name, bz_mode) + if "t" in mode: + return TextIOPicklable(binary_file) + else: + return binary_file + + +def gzip_pickle_open(name, mode='rb'): + """Open a gzip-compressed file in binary or text mode + with pickle function implemented. + + This function returns a GzipPicklable object when given the "rb" or "r" + reading mode, or a GzipPicklable object wrapped in a TextIOPicklable class + with the "rt" reading mode. + It can be used as a context manager, and replace the built-in + :func:`gzip.open` function in read mode that only returns an + unpicklable file object. + + Note + ---- + Can be only used with read mode. + + Parameters + ---------- + name : str + either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened. + mode: {'r', 'rt', 'rb'} (optional) + 'r': open for reading in binary mode; + 'rt': read in text mode; + 'rb': read in binary mode; (default) + + Returns + ------- + stream-like object: GzipPicklable or TextIOPicklable + when mode is 'rt', returns TextIOPicklable; + when mode is 'r' or 'rb', returns GzipPicklable + + Raises + ------ + ValueError + if `mode` is not one of the allowed read modes + + Examples + ------- + open as context manager:: + + with gzip_pickle_open('filename') as f: + line = f.readline() + + open as function:: + + f = gzip_pickle_open('filename') + line = f.readline() + f.close() + + See Also + -------- + :func:`io.open` + :func:`gzip.open` + :func:`MDAnalysis.lib.util.anyopen` + :func:`MDAnalysis.lib.picklable_file_io.pickle_open` + :func:`MDAnalysis.lib.picklable_file_io.bz2_pickle_open` + + + .. versionadded:: 2.0.0 + """ + if mode not in {'r', 'rt', 'rb'}: + raise ValueError("Only read mode ('r', 'rt', 'rb') " + "files can be pickled.") + gz_mode = mode.replace("t", "") + binary_file = GzipPicklable(name, gz_mode) + if "t" in mode: + return TextIOPicklable(binary_file) + else: + return binary_file diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index 804eae553f7..1e2a893556f 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -209,6 +209,8 @@ from numpy.testing import assert_equal import inspect +from .picklable_file_io import pickle_open, bz2_pickle_open, gzip_pickle_open + from ..exceptions import StreamWarning, DuplicateWarning try: from ._cutil import unique_int_1d @@ -338,8 +340,19 @@ def anyopen(datasource, mode='rt', reset=True): Only returns the ``stream`` and tries to set ``stream.name = filename`` instead of the previous behavior to return a tuple ``(stream, filename)``. + .. versionchanged:: 2.0.0 + New read handlers support pickle functionality + if `datasource` is a filename. + They return a custom picklable file stream in + :class:`MDAnalysis.lib.picklable_file_io`. + """ - handlers = {'bz2': bz2.open, 'gz': gzip.open, '': open} + read_handlers = {'bz2': bz2_pickle_open, + 'gz': gzip_pickle_open, + '': pickle_open} + write_handlers = {'bz2': bz2.open, + 'gz': gzip.open, + '': open} if mode.startswith('r'): if isstream(datasource): @@ -362,7 +375,7 @@ def anyopen(datasource, mode='rt', reset=True): stream = None filename = datasource for ext in ('bz2', 'gz', ''): # file == '' should be last - openfunc = handlers[ext] + openfunc = read_handlers[ext] stream = _get_stream(datasource, openfunc, mode=mode) if stream is not None: break @@ -383,7 +396,7 @@ def anyopen(datasource, mode='rt', reset=True): ext = ext[1:] if not ext in ('bz2', 'gz'): ext = '' # anything else but bz2 or gz is just a normal file - openfunc = handlers[ext] + openfunc = write_handlers[ext] stream = openfunc(datasource, mode=mode) if stream is None: raise IOError(errno.EIO, "Cannot open file or stream in mode={mode!r}.".format(**vars()), repr(filename)) diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst b/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst new file mode 100644 index 00000000000..0866590297f --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst @@ -0,0 +1,101 @@ +.. Contains the formatted docstrings for the serialization of universe located +.. mainly in 'MDAnalysis/libs/pickle_file_io.py' +.. _serialization: + +********************************************************* +Serialization of Coordinate Readers +********************************************************* + +To achieve a working implementation of parallelism, this document illustrates +the basic idea of how different coordinate readers are being serialized in MDAnalysis, +and what developers should do to serialize a new reader. + +To make sure every Trajectory reader can be successfully +serialized, we implement picklable I/O classes (see :ref:`implemented-fileio`). +When the file is pickled, filename and other necessary attributes of the open +file handle are saved. On unpickling, the file is opened by filename. +This means that for a successful unpickle, the original file still has to +be accessible with its filename. To retain the current frame of the trajectory, +:func:`_read_frame(previous frame)` will be called during unpickling. + +.. _how_to_serialize_a_new_reader: + +How to serialize a new reader +----------------------------- + +File Access +^^^^^^^^^^^ +If the new reader uses :func:`util.anyopen()` +(e.g. :class:`MDAnalysis.coordinates.PDB.PDBReader`), +the reading handler can be pickled without modification. +If the new reader uses I/O classes from other package +(e.g. :class:`MDAnalysis.coordinates.GSD.GSDReader`), +and cannot be pickled natively, create a new picklable class inherited from +the file class in that package +(e.g. :class:`MDAnalysis.coordinates.GSD.GSDPicklable`), +adding :func:`__getstate__`, +:func:`__setstate__` functions (or :func:`__reduce__` if needed. Consult the +pickle `documentation `_ of python) +to allow file handler serialization. + +To seek or not to seek +^^^^^^^^^^^^^^^^^^^^^^ +Some I/O classes support :func:`seek` and :func:`tell` functions to allow the file +to be pickled with an offset. It is normally not needed for MDAnalysis with +random access. But if error occurs during testing, find a way to make the offset work. +Maybe this I/O class supports frame indexing? Maybe the file handler inside this I/O +class supports offset? + +For example, in :class:`MDAnalysis.coordinates.TRZ.TRZReader`, +:func:`_read_frame` is implemented by :func:`_seek` ing the file into +its previous frame and :func:`_read_next_timestep`, so the offset of the file is crucial +for such machinery to work. + +Miscellaneous +^^^^^^^^^^^^^ +If pickle still fails due to some unpicklable attributes, try to find a way +to pickle those, or write custom :func:`__getstate__` and :func:`__setstate__` +methods for the reader. + +If the new reader is written in Cython, read :class:`lib.formats.libmdaxdr` and +:class:`lib.formats.libdcd` files as references. + +.. _test_pickle: + +Tests +----- +_SingleFrameReader Test +^^^^^^^^^^^^^^^^^^^^^^^ +If the new reader is a single-frame reader, the basic test should normally +inherited from :class:`_SingleFrameReader`, where the pickliablity is tested. + +BaseReaderTest and MultiframeReaderTest +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If the test for the new reader uses :class:`BaseReaderTest` or +:class:`MultiframeReaderTest`, whether the current timestep information is +saved (the former), whether its relative position is maintained, +i.e. next() reads the right next timestep, and whether its last timestep +can be pickled, are already tested. + +File handler Test +^^^^^^^^^^^^^^^^^ +If the new reader accesses the file with :func:`util.anyopen`, add necessary +tests inside ``parallelism/test_multiprocessing.py`` for the reader. + +If the new reader accessed the file with a new picklable I/O class, +add necessary tests inside ``utils/test_pickleio.py`` for the I/O class, +``parallelism/test_multiprocessing.py`` for the reader. + +.. _implemented-fileio: + +Currently implemented picklable IO Formats +------------------------------------------ + +* :class:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` +* :class:`MDAnalysis.lib.picklable_file_io.BufferIOPicklable` +* :class:`MDAnalysis.lib.picklable_file_io.TextIOPicklable` +* :class:`MDAnalysis.lib.picklable_file_io.BZ2Picklable` +* :class:`MDAnalysis.lib.picklable_file_io.GzipPicklable` +* :class:`MDAnalysis.coordinates.GSD.GSDPicklable` +* :class:`MDAnalysis.coordinates.TRJ.NCDFPicklable` +* :class:`MDAnalysis.coordinates.chemfiles.ChemfilesPicklable` diff --git a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst index c0e7c3f3467..c4f73198ac1 100644 --- a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst @@ -57,5 +57,11 @@ functionality should first read the :ref:`Trajectory API`. coordinates/base coordinates/core + coordinates/pickle_readers coordinates/chain coordinates/XDR + +In particular, all trajectory readers have to be +:ref:`serializable` and they should pass all tests +available in the ``MDAnalysisTests.coordinates.base.MultiframeReaderTest`` +or ``MDAnalysisTests.coordinates.base.BaseReaderTest``. diff --git a/package/doc/sphinx/source/documentation_pages/lib/picklable_file_io.rst b/package/doc/sphinx/source/documentation_pages/lib/picklable_file_io.rst new file mode 100644 index 00000000000..8df008afdde --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/lib/picklable_file_io.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.lib.picklable_file_io diff --git a/package/doc/sphinx/source/documentation_pages/lib_modules.rst b/package/doc/sphinx/source/documentation_pages/lib_modules.rst index 29ba1a05e8a..2021efbb325 100644 --- a/package/doc/sphinx/source/documentation_pages/lib_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/lib_modules.rst @@ -60,6 +60,7 @@ List of modules ./lib/qcprot ./lib/util ./lib/correlations + ./lib/picklable_file_io Low level file formats ---------------------- @@ -75,4 +76,4 @@ Python-based projects. :maxdepth: 1 ./lib/formats/libmdaxdr - ./lib/formats/libdcd \ No newline at end of file + ./lib/formats/libdcd diff --git a/testsuite/MDAnalysisTests/coordinates/base.py b/testsuite/MDAnalysisTests/coordinates/base.py index d874a3d24b6..32ebe734c22 100644 --- a/testsuite/MDAnalysisTests/coordinates/base.py +++ b/testsuite/MDAnalysisTests/coordinates/base.py @@ -21,6 +21,8 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import itertools +import pickle + import numpy as np import pytest from unittest import TestCase @@ -118,6 +120,13 @@ def test_last_slice(self): frames = [ts.frame for ts in trj_iter] assert_equal(frames, np.arange(self.universe.trajectory.n_frames)) + def test_pickle_singleframe_reader(self): + reader = self.universe.trajectory + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal(len(reader), len(reader_p)) + assert_equal(reader.ts, reader_p.ts, + "Single-frame timestep is changed after pickling") + class BaseReference(object): def __init__(self): @@ -417,12 +426,18 @@ def test_transformations_copy(self,ref,transformed): ideal_coords = ref.iter_ts(i).positions + v1 + v2 assert_array_almost_equal(ts.positions, ideal_coords, decimal = ref.prec) - def test_add_another_transformations_raises_ValueError(self, transformed): # After defining the transformations, the workflow cannot be changed with pytest.raises(ValueError): transformed.add_transformations(translate([2,2,2])) + def test_pickle_reader(self, reader): + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal(len(reader), len(reader_p)) + assert_equal(reader.ts, reader_p.ts, + "Timestep is changed after pickling") + + class MultiframeReaderTest(BaseReaderTest): def test_last_frame(self, ref, reader): ts = reader[-1] @@ -488,6 +503,23 @@ def test_iter_as_aux_lowf(self, ref, reader): ref.iter_ts(ref.aux_lowf_frames_with_steps[i]), decimal=ref.prec) + # To make sure we not only save the current timestep information, + # but also maintain its relative position. + def test_pickle_next_ts_reader(self, reader): + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal(next(reader), next(reader_p), + "Next timestep is changed after pickling") + + # To make sure pickle works for last frame. + def test_pickle_last_ts_reader(self, reader): + # move current ts to last frame. + reader[-1] + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal(len(reader), len(reader_p), + "Last timestep is changed after pickling") + assert_equal(reader.ts, reader_p.ts, + "Last timestep is changed after pickling") + class BaseWriterTest(object): @staticmethod diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 2fc377684c5..050b14d9004 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -341,10 +341,12 @@ def test_load_multiple_args(self): assert_equal(len(u.atoms), 3341, "Loading universe failed somehow") assert_equal(u.trajectory.n_frames, 2 * ref.trajectory.n_frames) - def test_pickle_raises_NotImplementedError(self): + def test_pickle(self): u = mda.Universe(PSF, DCD) - with pytest.raises(NotImplementedError): - pickle.dumps(u, protocol = pickle.HIGHEST_PROTOCOL) + s = pickle.dumps(u, protocol=pickle.HIGHEST_PROTOCOL) + new_u = pickle.loads(s) + assert_equal(u.atoms.names, new_u.atoms.names) + @pytest.mark.parametrize('dtype', (int, np.float32, np.float64)) def test_set_dimensions(self, dtype): diff --git a/testsuite/MDAnalysisTests/data/example_longer.gsd b/testsuite/MDAnalysisTests/data/example_longer.gsd new file mode 100644 index 00000000000..cccad7cbcc8 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/example_longer.gsd differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 90a2e1b8e24..ccca4c24a90 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -168,7 +168,7 @@ "legacy_DCD_ADK_coords", # frames 5 and 29 read in for adk_dims.dcd using legacy DCD reader "legacy_DCD_NAMD_coords", # frame 0 read in for SiN_tric_namd.dcd using legacy DCD reader "legacy_DCD_c36_coords", # frames 1 and 4 read in for tip125_tric_C36.dcd using legacy DCD reader - "GSD", "GSD_bonds", + "GSD", "GSD_bonds", "GSD_long", "GRO_MEMPROT", "XTC_MEMPROT", # YiiP transporter in POPE:POPG lipids with Na+, Cl-, Zn2+ dummy model without water "DihedralArray", "DihedralsArray", # time series of single dihedral "RamaArray", "GLYRamaArray", # time series of phi/psi angles @@ -510,6 +510,7 @@ GSD = resource_filename(__name__, 'data/example.gsd') GSD_bonds = resource_filename(__name__, 'data/example_bonds.gsd') +GSD_long = resource_filename(__name__, 'data/example_longer.gsd') DihedralArray = resource_filename(__name__, 'data/adk_oplsaa_dihedral.npy') DihedralsArray = resource_filename(__name__, 'data/adk_oplsaa_dihedral_list.npy') diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py new file mode 100644 index 00000000000..0832e907756 --- /dev/null +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -0,0 +1,179 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import multiprocessing + +import numpy as np +import pytest +import pickle +from numpy.testing import assert_equal + +import MDAnalysis as mda +from MDAnalysis.coordinates.core import get_reader_for + +from MDAnalysisTests.datafiles import ( + CRD, + PSF, DCD, + DMS, + DLP_CONFIG, + DLP_HISTORY, + FHIAIMS, + INPCRD, + GMS_ASYMOPT, + GMS_SYMOPT, + GRO, + GSD, + GSD_long, + LAMMPSdata_mini, + LAMMPSDUMP, + mol2_molecules, + MMTF, + NCDF, + PDB, PDB_small, PDB_multiframe, + PDBQT_input, + PQR, + TRR, + TRJ, + TRZ, + TXYZ, + XTC, + XPDB_small, + XYZ_mini, XYZ, XYZ_bz2, +) + + +@pytest.fixture(params=[ + (PSF, DCD), + (GRO, XTC), + (PDB_multiframe,), + (XYZ,), + (XYZ_bz2,), # .bz2 + (GMS_SYMOPT,), # .gms + (GMS_ASYMOPT,), # .gz + (GSD_long,), + (NCDF,), + (np.arange(150).reshape(5, 10, 3).astype(np.float64),), + (GRO, [GRO, GRO, GRO, GRO, GRO]), + (PDB, [PDB, PDB, PDB, PDB, PDB]), + (GRO, [XTC, XTC]), +]) +def u(request): + if len(request.param) == 1: + f = request.param[0] + return mda.Universe(f) + else: + top, trj = request.param + return mda.Universe(top, trj) + + +# Define target functions here +# inside test functions doesn't work +def cog(u, ag, frame_id): + u.trajectory[frame_id] + + return ag.center_of_geometry() + + +def test_multiprocess_COG(u): + ag = u.atoms[2:5] + + ref = np.array([cog(u, ag, i) + for i in range(3)]) + + p = multiprocessing.Pool(2) + res = np.array([p.apply(cog, args=(u, ag, i)) + for i in range(3)]) + p.close() + assert_equal(ref, res) + + +def getnames(u, ix): + # Check topology stuff works + return u.atoms[ix].name + + +def test_universe_unpickle_in_new_process(): + u = mda.Universe(GRO, XTC) + ref = [getnames(u, i) + for i in range(3)] + + p = multiprocessing.Pool(2) + res = [p.apply(getnames, args=(u, i)) + for i in range(3)] + p.close() + + assert_equal(ref, res) + + +@pytest.fixture(params=[ + # formatname, filename + ('CRD', CRD, dict()), + ('DATA', LAMMPSdata_mini, dict(n_atoms=1)), + ('DCD', DCD, dict()), + ('DMS', DMS, dict()), + ('CONFIG', DLP_CONFIG, dict()), + ('FHIAIMS', FHIAIMS, dict()), + ('HISTORY', DLP_HISTORY, dict()), + ('INPCRD', INPCRD, dict()), + ('LAMMPSDUMP', LAMMPSDUMP, dict()), + ('GMS', GMS_ASYMOPT, dict()), + ('GRO', GRO, dict()), + ('GSD', GSD, dict()), + ('MMTF', MMTF, dict()), + ('MOL2', mol2_molecules, dict()), + ('PDB', PDB_small, dict()), + ('PQR', PQR, dict()), + ('PDBQT', PDBQT_input, dict()), + ('TRR', TRR, dict()), + ('TRZ', TRZ, dict(n_atoms=8184)), + ('TRJ', TRJ, dict(n_atoms=252)), + ('XTC', XTC, dict()), + ('XPDB', XPDB_small, dict()), + ('XYZ', XYZ_mini, dict()), + ('NCDF', NCDF, dict()), + ('TXYZ', TXYZ, dict()), + ('memory', np.arange(60).reshape(2, 10, 3).astype(np.float64), dict()), + ('CHAIN', [GRO, GRO, GRO], dict()), + ('CHAIN', [PDB, PDB, PDB], dict()), + ('CHAIN', [XTC, XTC, XTC], dict()), +]) +def ref_reader(request): + fmt_name, filename, extras = request.param + r = get_reader_for(filename, format=fmt_name)(filename, **extras) + try: + yield r + finally: + # make sure file handle is closed afterwards + r.close() + + +def test_readers_pickle(ref_reader): + ps = pickle.dumps(ref_reader) + reanimated = pickle.loads(ps) + assert len(ref_reader) == len(reanimated) + try: + ref_reader[2] + ref_reader[0] + except IndexError: + # single frame files + pass + assert_equal(reanimated.ts, ref_reader.ts) diff --git a/testsuite/MDAnalysisTests/utils/test_pickleio.py b/testsuite/MDAnalysisTests/utils/test_pickleio.py new file mode 100644 index 00000000000..b005f478c12 --- /dev/null +++ b/testsuite/MDAnalysisTests/utils/test_pickleio.py @@ -0,0 +1,202 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pickle + +import pytest +from numpy.testing import assert_equal + +from MDAnalysis.lib.util import anyopen +from MDAnalysis.lib.picklable_file_io import ( + BufferIOPicklable, + FileIOPicklable, + TextIOPicklable, + BZ2Picklable, + GzipPicklable, + pickle_open, + bz2_pickle_open, + gzip_pickle_open, +) +from MDAnalysis.coordinates.GSD import ( + GSDPicklable, + gsd_pickle_open +) +from MDAnalysis.coordinates.TRJ import ( + NCDFPicklable, +) +from MDAnalysis.coordinates.chemfiles import ( + check_chemfiles_version +) +if check_chemfiles_version(): + from MDAnalysis.coordinates.chemfiles import ( + ChemfilesPicklable + ) + +from MDAnalysis.tests.datafiles import ( + PDB, + XYZ, + XYZ_bz2, + MMTF_gz, + GMS_ASYMOPT, + GSD, + NCDF +) + + +@pytest.fixture(params=[ + # filename mode + (PDB, 'r'), + (PDB, 'rt'), + (XYZ_bz2, 'rt'), + (GMS_ASYMOPT, 'rt') +]) +def f_text(request): + filename, mode = request.param + return anyopen(filename, mode) + + +def test_get_right_open_handler_text(f_text): + assert_equal(f_text.__class__, TextIOPicklable) + + +def test_iopickle_text(f_text): + f_text_pickled = pickle.loads(pickle.dumps(f_text)) + assert_equal(f_text.readlines(), f_text_pickled.readlines()) + + +def test_offset_text_to_0(f_text): + f_text.readline() + f_text_pickled = pickle.loads(pickle.dumps(f_text)) + assert_equal(f_text_pickled.tell(), 0) + + +@pytest.fixture(params=[ + # filename mode ref_class + (PDB, 'rb', BufferIOPicklable), + (XYZ_bz2, 'rb', BZ2Picklable), + (MMTF_gz, 'rb', GzipPicklable) +]) +def f_byte(request): + filename, mode, ref_reader_class = request.param + return anyopen(filename, mode), ref_reader_class + + +def test_get_right_open_handler_byte(f_byte): + assert_equal(f_byte[0].__class__, f_byte[1]) + + +def test_iopickle_byte(f_byte): + file = f_byte[0] + f_byte_pickled = pickle.loads(pickle.dumps(file)) + assert_equal(file.readlines(), f_byte_pickled.readlines()) + + +def test_offset_byte_to_tell(f_byte): + file = f_byte[0] + file.readline() + f_byte_pickled = pickle.loads(pickle.dumps(file)) + assert_equal(f_byte_pickled.tell(), file.tell()) + + +def test_context_manager_pickle(): + with pickle_open(PDB) as file: + file_pickled = pickle.loads(pickle.dumps(file)) + assert_equal(file.readlines(), file_pickled.readlines()) + + +def test_fileio_pickle(): + raw_io = FileIOPicklable(PDB) + raw_io_pickled = pickle.loads(pickle.dumps(raw_io)) + assert_equal(raw_io.readlines(), raw_io_pickled.readlines()) + + +@pytest.fixture(params=[ + # filename mode open_func open_class + ('test.pdb', 'w', pickle_open, FileIOPicklable), + ('test.pdb', 'x', pickle_open, FileIOPicklable), + ('test.pdb', 'a', pickle_open, FileIOPicklable), + ('test.bz2', 'w', bz2_pickle_open, BZ2Picklable), + ('test.gz', 'w', gzip_pickle_open, GzipPicklable), +]) +def unpicklable_f(request): + filename, mode, open_func, open_class = request.param + return filename, mode, open_func, open_class + + +def test_unpicklable_open_mode(unpicklable_f, tmpdir): + filename, mode, open_func, open_class = unpicklable_f + with pytest.raises(ValueError, match=r"Only read mode"): + open_func(tmpdir.mkdir("pickle").join(filename), mode) + + +def test_pickle_with_write_mode(unpicklable_f, tmpdir): + filename, mode, open_func, open_class = unpicklable_f + f_open_by_class = open_class(tmpdir.mkdir("pickle").join(filename), mode) + with pytest.raises(RuntimeError, match=r"Can only pickle"): + f_pickled = pickle.loads(pickle.dumps(f_open_by_class)) + + +def test_GSD_pickle(): + gsd_io = gsd_pickle_open(GSD, mode='rb') + gsd_io_pickled = pickle.loads(pickle.dumps(gsd_io)) + assert_equal(gsd_io.read_frame(0).particles.position, + gsd_io_pickled.read_frame(0).particles.position) + + +def test_GSD_with_write_mode(tmpdir): + with pytest.raises(ValueError, match=r"Only read mode"): + gsd_io = gsd_pickle_open(tmpdir.mkdir("gsd").join('t.gsd'), + mode='w') + + +def test_NCDF_pickle(): + ncdf_io = NCDFPicklable(NCDF, mmap=None) + ncdf_io_pickled = pickle.loads(pickle.dumps(ncdf_io)) + assert_equal(ncdf_io.variables['coordinates'][0], + ncdf_io_pickled.variables['coordinates'][0]) + + +def test_NCDF_mmap_pickle(): + ncdf_io = NCDFPicklable(NCDF, mmap=False) + ncdf_io_pickled = pickle.loads(pickle.dumps(ncdf_io)) + assert_equal(ncdf_io_pickled.use_mmap, False) + + +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +def test_Chemfiles_pickle(): + chemfiles_io = ChemfilesPicklable(XYZ) + chemfiles_io_pickled = pickle.loads(pickle.dumps(chemfiles_io)) + # frame has to be first saved to get the right position value. + # As opposed to `chemfiles_io.read().positions) + frame = chemfiles_io.read() + frame_pickled = chemfiles_io_pickled.read() + assert_equal(frame.positions[:], + frame_pickled.positions[:]) + + +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +def test_Chemfiles_with_write_mode(tmpdir): + with pytest.raises(ValueError, match=r"Only read mode"): + chemfiles_io = ChemfilesPicklable(tmpdir.mkdir("xyz").join('t.xyz'), + mode='w')