From d5090ace589edd1317a280fd52e7613b8bbe03a0 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Fri, 9 Sep 2016 21:40:38 +0200 Subject: [PATCH 1/9] Added MemoryReader and tests. --- package/MDAnalysis/analysis/align.py | 44 +-- package/MDAnalysis/coordinates/DCD.py | 15 +- package/MDAnalysis/coordinates/__init__.py | 1 + package/MDAnalysis/coordinates/base.py | 4 +- package/MDAnalysis/coordinates/memory.py | 252 ++++++++++++++++++ package/MDAnalysis/core/AtomGroup.py | 49 ++++ package/MDAnalysis/core/Selection.py | 6 + .../coordinates/test_memory.py | 149 +++++++++++ testsuite/MDAnalysisTests/test_atomgroup.py | 59 ++++ 9 files changed, 556 insertions(+), 23 deletions(-) create mode 100644 package/MDAnalysis/coordinates/memory.py create mode 100644 testsuite/MDAnalysisTests/coordinates/test_memory.py diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 9672da0c32d..d2712f2cb75 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -600,6 +600,7 @@ def rms_fit_trj( strict=False, force=True, quiet=False, + in_memory=False, **kwargs): """RMS-fit trajectory to a reference structure using a selection. @@ -652,7 +653,10 @@ def rms_fit_trj( - ``True``: suppress progress and logging for levels INFO and below. - ``False``: show all status messages and do not change the the logging level (default) - + *in_memory* + Default: ``False`` + - ``True``: Switch to an in-memory trajectory so that alignment can + be done in-place. *kwargs* All other keyword arguments are passed on the trajectory @@ -660,7 +664,7 @@ def rms_fit_trj( trajectories on the fly (e.g. change the output format by changing the extension of *filename* and setting different parameters as described for the corresponding writer). - :Returns: *filename* (either provided or auto-generated) + :Returns: *filename* (either provided or auto-generated), or None if in_memory=True .. _ClustalW: http://www.clustal.org/ .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/ @@ -682,19 +686,24 @@ def rms_fit_trj( logging.disable(logging.WARN) kwargs.setdefault('remarks', 'RMS fitted trajectory to reference') - if filename is None: - path, fn = os.path.split(frames.filename) - filename = os.path.join(path, prefix + fn) - _Writer = frames.Writer + writer = None + if in_memory: + traj.transfer_to_memory() + frames = traj.trajectory + filename = None + logger.info("Moved trajectory to in-memory representation") else: - _Writer = frames.OtherWriter - if os.path.exists(filename) and not force: - logger.warn( - "{0} already exists and will NOT be overwritten; use force=True if you want this".format(filename)) - return filename - writer = _Writer(filename, **kwargs) - del _Writer - + if filename is None: + path, fn = os.path.split(frames.filename) + filename = os.path.join(path, prefix + fn) + _Writer = frames.Writer + else: + _Writer = frames.OtherWriter + if os.path.exists(filename) and not force: + logger.warn("{0} already exists and will NOT be overwritten; use force=True if you want this".format(filename)) + return filename + writer = _Writer(filename, **kwargs) + del _Writer select = rms.process_selection(select) ref_atoms = reference.select_atoms(*select['reference']) traj_atoms = traj.select_atoms(*select['mobile']) @@ -756,10 +765,11 @@ def rms_fit_trj( ts.positions[:] = ts.positions * R ts.positions += ref_com - writer.write(traj.atoms) # write whole input trajectory system - percentage.echo(ts.frame) + if writer is not None: + writer.write(traj.atoms) # write whole input trajectory system + percentage.echo(ts.frame) logger.info("Wrote %d RMS-fitted coordinate frames to file %r", - frames.n_frames, filename) + frames.n_frames, filename) if rmsdfile is not None: np.savetxt(rmsdfile, rmsd) diff --git a/package/MDAnalysis/coordinates/DCD.py b/package/MDAnalysis/coordinates/DCD.py index e121a098295..12d6c8eb296 100644 --- a/package/MDAnalysis/coordinates/DCD.py +++ b/package/MDAnalysis/coordinates/DCD.py @@ -500,7 +500,7 @@ def _read_frame(self, frame): ts.frame = frame return ts - def timeseries(self, asel, start=None, stop=None, step=None, skip=None, + def timeseries(self, asel=None, start=None, stop=None, step=None, skip=None, format='afc'): """Return a subset of coordinate data for an AtomGroup @@ -527,12 +527,17 @@ def timeseries(self, asel, start=None, stop=None, step=None, skip=None, category=DeprecationWarning) start, stop, step = self.check_slice_indices(start, stop, step) - if len(asel) == 0: - raise NoDataError("Timeseries requires at least one atom to analyze") + + if asel is not None: + if len(asel) == 0: + raise NoDataError("Timeseries requires at least one atom to analyze") + atom_numbers = list(asel.indices) + else: + atom_numbers = range(self.n_atoms) + if len(format) != 3 and format not in ['afc', 'acf', 'caf', 'cfa', 'fac', 'fca']: raise ValueError("Invalid timeseries format") - atom_numbers = list(asel.indices) - # Check if the atom numbers can be grouped for efficiency, then we can read partial buffers + # Check if the atom numbers can be grouped for efficiency, then we can read partial buffers # from trajectory file instead of an entire timestep # XXX needs to be implemented return self._read_timeseries(atom_numbers, start, stop, step, format) diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 28805c4140b..300525775fa 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -709,6 +709,7 @@ from . import TRZ from . import XTC from . import XYZ +from . import memory try: from . import DCD diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index e6ace6f7b92..f9afc7fa174 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -121,6 +121,7 @@ from six.moves import range import six +import logging as log import itertools import os.path import warnings @@ -1401,6 +1402,7 @@ def next_as_aux(self, auxname): -------- :meth:`iter_as_aux` """ + aux = self._check_for_aux(auxname) ts = self.ts # catch up auxiliary if it starts earlier than trajectory @@ -1416,7 +1418,7 @@ def next_as_aux(self, auxname): while self.frame != next_frame or getattr(self, '_frame', 0) == -1: # iterate trajectory until frame is reached ts = self.next() - return ts + return ts def iter_as_aux(self, auxname): """Iterate through timesteps for which there is at least one assigned diff --git a/package/MDAnalysis/coordinates/memory.py b/package/MDAnalysis/coordinates/memory.py new file mode 100644 index 00000000000..311c87be944 --- /dev/null +++ b/package/MDAnalysis/coordinates/memory.py @@ -0,0 +1,252 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- http://www.MDAnalysis.org +# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein +# and contributors (see AUTHORS for the full list) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Reading trajectories from memory --- :mod:`MDAnalysis.coordinates.memory` +========================================================================== + +:Author: Wouter Boomsma +:Year: 2016 +:Copyright: GNU Public License v2 +:Maintainer: Wouter Boomsma , wouterboomsma on github + + +.. versionadded:: 0.15.0 + +The module contains a trajectory reader that operates on an array +in memory, rather than reading from file. This makes it possible to +use operate on raw coordinate using existing MDAnalysis tools. In +addition, it allows the user to make changes to the coordinates in +a trajectory (e.g. through AtomGroup.set_positions) without having +to write the entire state to file. + + +Examples +-------- + +Constructing a Reader from a numpy array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A simple example where a new universe is created from the +array extracted from a DCD timeseries + + from MDAnalysis import Universe + from MDAnalysisTests.datafiles import DCD, PDB_small + from MDAnalysis.coordinates.array import ArrayReader + + universe = Universe(PDB_small, DCD) + coordinates = universe.trajectory.timeseries(universe.atoms) + + universe2 = Universe(PDB_small, coordinates, + format=MemoryReader) + +This two step process can also be done in one go: + + universe = Universe(PDB_small, DCD, in_memory=True) + +""" +import logging +import errno +import numpy as np + +from . import base + + +class MemoryReader(base.ProtoReader): + """ + A trajectory reader interface to a numpy array of the coordinates. + For compatibility with the timeseries interface, support is provided for + specifying the order of columns through the format option. + + """ + + format = 'memory' + + class MemoryTimestep(base.Timestep): + """ + Overrides the positions property in base.Timestep to + use avoid duplication of the array. + """ + + @property + def positions(self): + return base.Timestep.positions.fget(self) + + @positions.setter + def positions(self, new): + self.has_positions = True + # Use reference to original rather than a copy + self._pos = new + + _Timestep = MemoryTimestep + + def __init__(self, coordinate_array, format='afc', + dimensions = None, dt=1, **kwargs): + """ + + Parameters + --------- + coordinate_array : :class:`~numpy.ndarray object + The underlying array of coordinates + format : str, optional + the order/shape of the return data array, corresponding + to (a)tom, (f)rame, (c)oordinates all six combinations + of 'a', 'f', 'c' are allowed ie "fac" - return array + where the shape is (frame, number of atoms, + coordinates) + dimensions: (*A*, *B*, *C*, *alpha*, *beta*, *gamma*), optional + unitcell dimensions (*A*, *B*, *C*, *alpha*, *beta*, *gamma*) + + lengths *A*, *B*, *C* are in the MDAnalysis length unit (Å), and + angles are in degrees. + dt: float, optional + The time difference between frames (ps). If :attr:`time` + is set, then `dt` will be ignored. + """ + + super(MemoryReader, self).__init__() + + self.set_array(np.asarray(coordinate_array), format) + self.n_frames = self.coordinate_array.shape[self.format.find('f')] + self.n_atoms = self.coordinate_array.shape[self.format.find('a')] + + provided_n_atoms = kwargs.pop("n_atoms", None) + if (provided_n_atoms is not None and + provided_n_atoms != self.n_atoms): + raise ValueError("The provided value for n_atoms does not match" + "the shape of the coordinate array") + + self.ts = self._Timestep(self.n_atoms, **kwargs) + self.ts.dt = dt + if dimensions is not None: + self.ts.dimensions = dimensions + self.ts.frame = -1 + self.ts.time = -1 + self._read_next_timestep() + + def set_array(self, coordinate_array, format='afc'): + """ + Set underlying array in desired column format. + + Parameters + --------- + coordinate_array : :class:`~numpy.ndarray object + The underlying array of coordinates + format + The order/shape of the return data array, corresponding + to (a)tom, (f)rame, (c)oordinates all six combinations + of 'a', 'f', 'c' are allowed ie "fac" - return array + where the shape is (frame, number of atoms, + coordinates) + """ + self.coordinate_array = coordinate_array + self.format = format + + def get_array(self): + """ + Return underlying array. + """ + return self.coordinate_array + + def _reopen(self): + """Reset iteration to first frame""" + self.ts.frame = -1 + self.ts.time = -1 + + def timeseries(self, asel=None, start=0, stop=-1, step=1, format='afc'): + """Return a subset of coordinate data for an AtomGroup in desired + column format. If no selection is given, it will return a view of + the underlying array, while a copy is returned otherwise. + + Parameters + --------- + asel : :class:`~MDAnalysis.core.AtomGroup.AtomGroup` object + Atom selection + start, stop, skip : int + range of trajectory to access, start and stop are inclusive + format : str + the order/shape of the return data array, corresponding + to (a)tom, (f)rame, (c)oordinates all six combinations + of 'a', 'f', 'c' are allowed ie "fac" - return array + where the shape is (frame, number of atoms, + coordinates) + """ + + array = self.get_array() + if format == self.format: + pass + elif format[0] == self.format[0]: + array = np.swapaxes(array, 1, 2) + elif format[1] == self.format[1]: + array = np.swapaxes(array, 0, 2) + elif format[2] == self.format[2]: + array = np.swapaxes(array, 0, 1) + elif self.format[1] == format[0]: + array = np.swapaxes(array, 1, 0) + array = np.swapaxes(array, 1, 2) + elif self.format[2] == format[0]: + array = np.swapaxes(array, 2, 0) + array = np.swapaxes(array, 1, 2) + + a_index = format.find('a') + f_index = format.find('f') + stop_index = stop+1 + if stop_index == 0: + stop_index = None + basic_slice = ([slice(None)] * f_index + + [slice(start, stop_index, step)] + + [slice(None)] * (2-f_index)) + + # Return a view if either: + # 1) asel is None + # 2) asel corresponds to the selection of all atoms. + array = array[basic_slice] + if (asel is None or asel is asel.universe.atoms): + return array + else: + # If selection is specified, return a copy + return array.take(asel.indices, a_index) + + def _read_next_timestep(self, ts=None): + """copy next frame into timestep""" + + if self.ts.frame >= self.n_frames-1: + raise IOError(errno.EIO, 'trying to go over trajectory limit') + if ts is None: + ts = self.ts + ts.frame += 1 + f_index = self.format.find('f') + basic_slice = ([slice(None)]*(f_index) + + [self.ts.frame] + + [slice(None)]*(2-f_index)) + ts.positions = self.coordinate_array[basic_slice] + + ts.time = self.ts.frame*self.dt + return ts + + def _read_frame(self, i): + """read frame i""" + # Frame number is incremented to zero by _read_next_timestep() + self.ts.frame = i - 1 + return self._read_next_timestep() + + def __repr__(self): + """String representation""" + return ("<{cls} with {nframes} frames of {natoms} atoms>" + "".format( + cls=self.__class__.__name__, + nframes=self.n_frames, + natoms=self.n_atoms + )) diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py index ca4ee119620..d6a8b200f87 100644 --- a/package/MDAnalysis/core/AtomGroup.py +++ b/package/MDAnalysis/core/AtomGroup.py @@ -4232,6 +4232,11 @@ def __init__(self, *args, **kwargs): Universe with matching *anchor_name* is found. *is_anchor* will be ignored in this case but will still be honored when unpickling :class:`MDAnalysis.core.AtomGroup.AtomGroup` instances pickled with *anchor_name*==``None``. [``None``] + *in_memory* + After reading in the trajectory, transfer it to an in-memory + representations, which allow for manipulation of coordinates. + *in_memory_frame_interval + Only read every nth frame into in-memory representation. This class tries to do the right thing: @@ -4897,6 +4902,10 @@ def load_new(self, filename, **kwargs): fname=filename, trj_n_atoms=self.trajectory.n_atoms)) + # Optionally switch to an in-memory representation of the trajectory + if kwargs.get("in_memory"): + self.transfer_to_memory(kwargs.get("in_memory_frame_interval", 1)) + return filename, self.trajectory.format def select_atoms(self, sel, *othersel, **selgroups): @@ -5028,6 +5037,46 @@ def _matches_unpickling(self, anchor_name, n_atoms, fname, trajname): else: return False + def transfer_to_memory(self, frame_interval=1): + """Replace the current trajectory reader object with one of type + :class:`MDAnalysis.coordinates.memory.MemoryReader` to support in-place + editing of coordinates. + + :Arguments: + *frame_interval* + Read in every nth frame. + + .. versionadded:: 0.15.0 + """ + + from ..coordinates.memory import MemoryReader + + if self.trajectory.format != "array": + + # Try to extract coordinates using Timeseries object + # This is significantly faster, but only implemented for certain + # trajectory file formats + try: + coordinates = self.trajectory.timeseries( + self.atoms, format='afc', step=frame_interval) + + # if the Timeseries extraction fails, + # fall back to a slower approach + except AttributeError: + coordinates = \ + np.array([ts.positions for ts in + self.trajectory[frame_interval-1::frame_interval]]) + coordinates = coordinates.swapaxes(0, 1) + + # Overwrite trajectory in universe with an MemoryReader + # object, to provide fast access and allow coordinates + # to be manipulated + self.trajectory = \ + MemoryReader(coordinates, + dimensions=self.trajectory.ts.dimensions, + dt=self.trajectory.ts.dt) + + @deprecate(message=_SIXTEEN_DEPRECATION) def as_Universe(*args, **kwargs): diff --git a/package/MDAnalysis/core/Selection.py b/package/MDAnalysis/core/Selection.py index d76a15b01c9..fa5933f9d3d 100644 --- a/package/MDAnalysis/core/Selection.py +++ b/package/MDAnalysis/core/Selection.py @@ -178,6 +178,12 @@ def __init__(self, parser, tokens): pass def apply(self, group): + # Check whether group is identical to the one stored + # in the corresponding universe, in which case this + # is returned directly. This works since the Universe.atoms + # are unique by construction. + if group.universe and group is group.universe.atoms: + return group return unique(group[:]) diff --git a/testsuite/MDAnalysisTests/coordinates/test_memory.py b/testsuite/MDAnalysisTests/coordinates/test_memory.py new file mode 100644 index 00000000000..f4fffb46386 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_memory.py @@ -0,0 +1,149 @@ +import numpy as np +import logging +from numpy.testing import raises + +import MDAnalysis as mda +from MDAnalysisTests.datafiles import DCD, PSF +from MDAnalysisTests.coordinates.base import (BaseReference, + BaseReaderTest) +from MDAnalysis.coordinates.memory import MemoryReader +from numpy.testing import assert_equal + + +class MemoryReference(BaseReference): + def __init__(self): + super(MemoryReference, self).__init__() + + self.topology = PSF + self.trajectory = DCD + self.universe = mda.Universe(PSF, DCD) + + self.n_atoms = self.universe.trajectory.n_atoms + self.n_frames = self.universe.trajectory.n_frames + + self.dt = self.universe.trajectory.ts.dt + self.dimensions = self.universe.trajectory.ts.dimensions + self.totaltime = self.universe.trajectory.totaltime + self.volume = self.universe.trajectory.ts.volume + + self.first_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.first_frame.positions = np.array(self.universe.trajectory[0]) + self.first_frame.frame = 0 + self.first_frame.time = self.first_frame.frame*self.dt + + self.second_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.second_frame.positions = np.array(self.universe.trajectory[1]) + self.second_frame.frame = 1 + self.second_frame.time = self.second_frame.frame*self.dt + + self.last_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.last_frame.positions = \ + np.array(self.universe.trajectory[self.n_frames - 1]) + self.last_frame.frame = self.n_frames - 1 + self.last_frame.time = self.last_frame.frame*self.dt + + self.jump_to_frame = self.first_frame.copy() + self.jump_to_frame.positions = np.array(self.universe.trajectory[3]) + self.jump_to_frame.frame = 3 + self.jump_to_frame.time = self.jump_to_frame.frame*self.dt + + def reader(self, trajectory): + return mda.Universe(self.topology, + trajectory, in_memory=True).trajectory + + def iter_ts(self, i): + ts = self.universe.trajectory[i] + return ts + + + +class TestMemoryReader(BaseReaderTest): + def __init__(self): + reference = MemoryReference() + super(TestMemoryReader, self).__init__(reference) + + def test_iteration(self): + frames = 0 + for i, frame in enumerate(self.reader): + frames += 1 + assert_equal(frames, self.ref.n_frames) + + def test_extract_array_afc(self): + assert_equal(self.reader.timeseries(format='afc').shape, (3341, 98, 3)) + + def test_extract_array_fac(self): + assert_equal(self.reader.timeseries(format='fac').shape, (98, 3341, 3)) + + def test_extract_array_cfa(self): + assert_equal(self.reader.timeseries(format='cfa').shape, (3, 98, 3341)) + + def test_extract_array_acf(self): + assert_equal(self.reader.timeseries(format='acf').shape, (3341, 3, 98)) + + def test_extract_array_fca(self): + assert_equal(self.reader.timeseries(format='fca').shape, (98, 3, 3341)) + + def test_extract_array_caf(self): + assert_equal(self.reader.timeseries(format='caf').shape, (3, 3341, 98)) + + def test_timeseries_skip1(self): + assert_equal(self.reader.timeseries(self.ref.universe.atoms).shape, + (3341, 98, 3)) + + def test_timeseries_skip10(self): + # Check that timeseries skip works similar to numpy slicing + array1 = self.reader.timeseries(step=10) + array2 = self.reader.timeseries()[:,::10,:] + assert_equal(array1, array2) + + def test_timeseries_view(self): + # timeseries() is expected to provide a view of the underlying array + assert_equal(self.reader.timeseries().base is self.reader.get_array(), + True) + + def test_timeseries_subarray_view(self): + # timeseries() is expected to provide a view of the underlying array + # also in the case where we slice the array using the start, stop and + # step options. + assert_equal( + self.reader.timeseries(start=5, + stop=15, + step=2, + format='fac').base is self.reader.get_array(), + True) + + def test_timeseries_view_from_universe_atoms(self): + # timeseries() is expected to provide a view of the underlying array + # also in the special case when asel=universe.atoms. + selection = self.ref.universe.atoms + assert_equal(self.reader.timeseries( + asel=selection).base is self.reader.get_array(), + True) + + def test_timeseries_view_from_select_all(self): + # timeseries() is expected to provide a view of the underlying array + # also in the special case when using "all" in selections. + selection = self.ref.universe.select_atoms("all") + assert_equal(self.reader.timeseries( + asel=selection).base is self.reader.get_array(), + True) + + def test_timeseries_noview(self): + # timeseries() is expected NOT to provide a view of the underlying array + # for any other selection than "all". + selection = self.ref.universe.select_atoms("name CA") + assert_equal(self.reader.timeseries( + asel=selection).base is self.reader.get_array(), + False) + + def test_repr(self): + str_rep = str(self.reader) + expected = "" + assert_equal(str_rep, expected) + + def test_get_writer_1(self): + pass + + def test_get_writer_2(self): + pass + diff --git a/testsuite/MDAnalysisTests/test_atomgroup.py b/testsuite/MDAnalysisTests/test_atomgroup.py index b434769d900..b33097f584d 100644 --- a/testsuite/MDAnalysisTests/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/test_atomgroup.py @@ -2026,6 +2026,65 @@ def test_custom_both(self): topology_format=MDAnalysis.topology.PSFParser.PSFParser) assert_equal(len(u.atoms), 8184) +class TestInMemoryUniverse(TestCase): + + @staticmethod + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def test_reader_w_timeseries(): + universe = MDAnalysis.Universe(PSF, DCD, in_memory=True) + assert_equal(universe.trajectory.timeseries(universe.atoms).shape, + (3341, 98, 3), + err_msg="Unexpected shape of trajectory timeseries") + + @staticmethod + def test_reader_wo_timeseries(): + universe = MDAnalysis.Universe(GRO, TRR, in_memory=True) + assert_equal(universe.trajectory.timeseries(universe.atoms).shape, + (47681, 10, 3), + err_msg="Unexpected shape of trajectory timeseries") + + @staticmethod + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def test_reader_w_timeseries_frame_interval(): + universe = MDAnalysis.Universe(PSF, DCD, in_memory=True, + in_memory_frame_interval=10) + assert_equal(universe.trajectory.timeseries(universe.atoms).shape, + (3341, 10, 3), + err_msg="Unexpected shape of trajectory timeseries") + + @staticmethod + def test_reader_wo_timeseries_frame_interval(): + universe = MDAnalysis.Universe(GRO, TRR, in_memory=True, + in_memory_frame_interval=3) + assert_equal(universe.trajectory.timeseries(universe.atoms).shape, + (47681, 3, 3), + err_msg="Unexpected shape of trajectory timeseries") + + @staticmethod + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def test_existing_universe(): + universe = MDAnalysis.Universe(PDB_small, DCD) + universe.transfer_to_memory() + assert_equal(universe.trajectory.timeseries(universe.atoms).shape, + (3341, 98, 3), + err_msg="Unexpected shape of trajectory timeseries") + + + @staticmethod + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def test_frame_interval_convention(): + universe1 = MDAnalysis.Universe(PSF, DCD) + array1 = universe1.trajectory.timeseries(skip=10) + universe2 = MDAnalysis.Universe(PSF, DCD, in_memory=True, + in_memory_frame_interval=10) + array2 = universe2.trajectory.timeseries() + assert_equal(array1, array2, + err_msg="Unexpected differences between arrays.") + class TestWrap(TestCase): @dec.skipif(parser_not_found('TRZ'), From 5710477765a5c1bee77c7d946a7dbda4b0bd5e90 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Sat, 10 Sep 2016 13:08:43 +0200 Subject: [PATCH 2/9] Added documentation, and removed unused import --- package/MDAnalysis/coordinates/DCD.py | 2 ++ package/MDAnalysis/coordinates/base.py | 1 - package/MDAnalysis/coordinates/memory.py | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/DCD.py b/package/MDAnalysis/coordinates/DCD.py index 12d6c8eb296..50c834515d8 100644 --- a/package/MDAnalysis/coordinates/DCD.py +++ b/package/MDAnalysis/coordinates/DCD.py @@ -507,6 +507,8 @@ def timeseries(self, asel=None, start=None, stop=None, step=None, skip=None, :Arguments: *asel* :class:`~MDAnalysis.core.AtomGroup.AtomGroup` object + Defaults to None, in which case the full set of coordinate data + is returned. *start, stop, step* A range of the trajectory to access, with start being inclusive and stop being exclusive. diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index f9afc7fa174..0cafa01a81f 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -121,7 +121,6 @@ from six.moves import range import six -import logging as log import itertools import os.path import warnings diff --git a/package/MDAnalysis/coordinates/memory.py b/package/MDAnalysis/coordinates/memory.py index 311c87be944..e6d23e58b06 100644 --- a/package/MDAnalysis/coordinates/memory.py +++ b/package/MDAnalysis/coordinates/memory.py @@ -173,7 +173,10 @@ def timeseries(self, asel=None, start=0, stop=-1, step=1, format='afc'): Parameters --------- asel : :class:`~MDAnalysis.core.AtomGroup.AtomGroup` object - Atom selection + Atom selection. Defaults to None, in which case the full set of + coordinate data is returned. Note that in this case, a view + of the underlying numpy array is returned, while a copy of the + data is returned whenever asel is different from None. start, stop, skip : int range of trajectory to access, start and stop are inclusive format : str From 2637a20e360978038287f9b52b336a9836c53833 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Sun, 11 Sep 2016 22:01:04 +0200 Subject: [PATCH 3/9] Minor bugfixes and documentation updates related to MemoryReader. --- package/MDAnalysis/coordinates/memory.py | 66 ++++++++++--------- package/MDAnalysis/core/AtomGroup.py | 4 +- .../coordinates/test_memory.py | 10 ++- testsuite/MDAnalysisTests/test_atomgroup.py | 4 +- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/package/MDAnalysis/coordinates/memory.py b/package/MDAnalysis/coordinates/memory.py index e6d23e58b06..57f9798f247 100644 --- a/package/MDAnalysis/coordinates/memory.py +++ b/package/MDAnalysis/coordinates/memory.py @@ -20,10 +20,10 @@ :Author: Wouter Boomsma :Year: 2016 :Copyright: GNU Public License v2 -:Maintainer: Wouter Boomsma , wouterboomsma on github +:Maintainer: Wouter Boomsma , wouterboomsma on github -.. versionadded:: 0.15.0 +.. versionadded:: 0.16.0 The module contains a trajectory reader that operates on an array in memory, rather than reading from file. This makes it possible to @@ -44,7 +44,7 @@ from MDAnalysis import Universe from MDAnalysisTests.datafiles import DCD, PDB_small - from MDAnalysis.coordinates.array import ArrayReader + from MDAnalysis.coordinates.memory import MemoryReader universe = Universe(PDB_small, DCD) coordinates = universe.trajectory.timeseries(universe.atoms) @@ -64,6 +64,23 @@ from . import base +class Timestep(base.Timestep): + """ + Overrides the positions property in base.Timestep to + use avoid duplication of the array. + """ + + @property + def positions(self): + return base.Timestep.positions.fget(self) + + @positions.setter + def positions(self, new): + self.has_positions = True + # Use reference to original rather than a copy + self._pos = new + + class MemoryReader(base.ProtoReader): """ A trajectory reader interface to a numpy array of the coordinates. @@ -73,24 +90,7 @@ class MemoryReader(base.ProtoReader): """ format = 'memory' - - class MemoryTimestep(base.Timestep): - """ - Overrides the positions property in base.Timestep to - use avoid duplication of the array. - """ - - @property - def positions(self): - return base.Timestep.positions.fget(self) - - @positions.setter - def positions(self, new): - self.has_positions = True - # Use reference to original rather than a copy - self._pos = new - - _Timestep = MemoryTimestep + _Timestep = Timestep def __init__(self, coordinate_array, format='afc', dimensions = None, dt=1, **kwargs): @@ -108,7 +108,6 @@ def __init__(self, coordinate_array, format='afc', coordinates) dimensions: (*A*, *B*, *C*, *alpha*, *beta*, *gamma*), optional unitcell dimensions (*A*, *B*, *C*, *alpha*, *beta*, *gamma*) - lengths *A*, *B*, *C* are in the MDAnalysis length unit (Å), and angles are in degrees. dt: float, optional @@ -118,9 +117,12 @@ def __init__(self, coordinate_array, format='afc', super(MemoryReader, self).__init__() + self.stored_format = format self.set_array(np.asarray(coordinate_array), format) - self.n_frames = self.coordinate_array.shape[self.format.find('f')] - self.n_atoms = self.coordinate_array.shape[self.format.find('a')] + self.n_frames = \ + self.coordinate_array.shape[self.stored_format.find('f')] + self.n_atoms = \ + self.coordinate_array.shape[self.stored_format.find('a')] provided_n_atoms = kwargs.pop("n_atoms", None) if (provided_n_atoms is not None and @@ -152,7 +154,7 @@ def set_array(self, coordinate_array, format='afc'): coordinates) """ self.coordinate_array = coordinate_array - self.format = format + self.stored_format = format def get_array(self): """ @@ -188,18 +190,18 @@ def timeseries(self, asel=None, start=0, stop=-1, step=1, format='afc'): """ array = self.get_array() - if format == self.format: + if format == self.stored_format: pass - elif format[0] == self.format[0]: + elif format[0] == self.stored_format[0]: array = np.swapaxes(array, 1, 2) - elif format[1] == self.format[1]: + elif format[1] == self.stored_format[1]: array = np.swapaxes(array, 0, 2) - elif format[2] == self.format[2]: + elif format[2] == self.stored_format[2]: array = np.swapaxes(array, 0, 1) - elif self.format[1] == format[0]: + elif format[0] == self.stored_format[1]: array = np.swapaxes(array, 1, 0) array = np.swapaxes(array, 1, 2) - elif self.format[2] == format[0]: + elif format[0] == self.stored_format[2]: array = np.swapaxes(array, 2, 0) array = np.swapaxes(array, 1, 2) @@ -230,7 +232,7 @@ def _read_next_timestep(self, ts=None): if ts is None: ts = self.ts ts.frame += 1 - f_index = self.format.find('f') + f_index = self.stored_format.find('f') basic_slice = ([slice(None)]*(f_index) + [self.ts.frame] + [slice(None)]*(2-f_index)) diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py index d6a8b200f87..0e95c3d6e66 100644 --- a/package/MDAnalysis/core/AtomGroup.py +++ b/package/MDAnalysis/core/AtomGroup.py @@ -5051,7 +5051,7 @@ def transfer_to_memory(self, frame_interval=1): from ..coordinates.memory import MemoryReader - if self.trajectory.format != "array": + if self.trajectory.format != "memory": # Try to extract coordinates using Timeseries object # This is significantly faster, but only implemented for certain @@ -5065,7 +5065,7 @@ def transfer_to_memory(self, frame_interval=1): except AttributeError: coordinates = \ np.array([ts.positions for ts in - self.trajectory[frame_interval-1::frame_interval]]) + self.trajectory[::frame_interval]]) coordinates = coordinates.swapaxes(0, 1) # Overwrite trajectory in universe with an MemoryReader diff --git a/testsuite/MDAnalysisTests/coordinates/test_memory.py b/testsuite/MDAnalysisTests/coordinates/test_memory.py index f4fffb46386..3e431296816 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_memory.py +++ b/testsuite/MDAnalysisTests/coordinates/test_memory.py @@ -1,12 +1,10 @@ import numpy as np -import logging -from numpy.testing import raises import MDAnalysis as mda from MDAnalysisTests.datafiles import DCD, PSF from MDAnalysisTests.coordinates.base import (BaseReference, BaseReaderTest) -from MDAnalysis.coordinates.memory import MemoryReader +from MDAnalysis.coordinates.memory import Timestep from numpy.testing import assert_equal @@ -26,17 +24,17 @@ def __init__(self): self.totaltime = self.universe.trajectory.totaltime self.volume = self.universe.trajectory.ts.volume - self.first_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.first_frame = Timestep(self.n_atoms) self.first_frame.positions = np.array(self.universe.trajectory[0]) self.first_frame.frame = 0 self.first_frame.time = self.first_frame.frame*self.dt - self.second_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.second_frame = Timestep(self.n_atoms) self.second_frame.positions = np.array(self.universe.trajectory[1]) self.second_frame.frame = 1 self.second_frame.time = self.second_frame.frame*self.dt - self.last_frame = MemoryReader.MemoryTimestep(self.n_atoms) + self.last_frame = Timestep(self.n_atoms) self.last_frame.positions = \ np.array(self.universe.trajectory[self.n_frames - 1]) self.last_frame.frame = self.n_frames - 1 diff --git a/testsuite/MDAnalysisTests/test_atomgroup.py b/testsuite/MDAnalysisTests/test_atomgroup.py index b33097f584d..a83c52e5048 100644 --- a/testsuite/MDAnalysisTests/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/test_atomgroup.py @@ -2026,6 +2026,7 @@ def test_custom_both(self): topology_format=MDAnalysis.topology.PSFParser.PSFParser) assert_equal(len(u.atoms), 8184) + class TestInMemoryUniverse(TestCase): @staticmethod @@ -2059,7 +2060,7 @@ def test_reader_wo_timeseries_frame_interval(): universe = MDAnalysis.Universe(GRO, TRR, in_memory=True, in_memory_frame_interval=3) assert_equal(universe.trajectory.timeseries(universe.atoms).shape, - (47681, 3, 3), + (47681, 4, 3), err_msg="Unexpected shape of trajectory timeseries") @staticmethod @@ -2072,7 +2073,6 @@ def test_existing_universe(): (3341, 98, 3), err_msg="Unexpected shape of trajectory timeseries") - @staticmethod @dec.skipif(parser_not_found('DCD'), 'DCD parser not available. Are you using python 3?') From 2b30138675ebcc43a1ea76844c3c5221850ceea0 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Mon, 12 Sep 2016 13:05:22 +0200 Subject: [PATCH 4/9] Changed reader format string: memory->MEMORY --- package/MDAnalysis/coordinates/memory.py | 2 +- package/MDAnalysis/core/AtomGroup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/memory.py b/package/MDAnalysis/coordinates/memory.py index 57f9798f247..e5282230492 100644 --- a/package/MDAnalysis/coordinates/memory.py +++ b/package/MDAnalysis/coordinates/memory.py @@ -89,7 +89,7 @@ class MemoryReader(base.ProtoReader): """ - format = 'memory' + format = 'MEMORY' _Timestep = Timestep def __init__(self, coordinate_array, format='afc', diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py index 0e95c3d6e66..943f96193a7 100644 --- a/package/MDAnalysis/core/AtomGroup.py +++ b/package/MDAnalysis/core/AtomGroup.py @@ -5051,7 +5051,7 @@ def transfer_to_memory(self, frame_interval=1): from ..coordinates.memory import MemoryReader - if self.trajectory.format != "memory": + if self.trajectory.format != MemoryReader.format: # Try to extract coordinates using Timeseries object # This is significantly faster, but only implemented for certain From b503176dd434d240a36efe7f1aaae788a5361af3 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Mon, 12 Sep 2016 13:07:51 +0200 Subject: [PATCH 5/9] Removed check for existence of group.universe --- package/MDAnalysis/core/Selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/Selection.py b/package/MDAnalysis/core/Selection.py index fa5933f9d3d..66c32ac4515 100644 --- a/package/MDAnalysis/core/Selection.py +++ b/package/MDAnalysis/core/Selection.py @@ -182,7 +182,7 @@ def apply(self, group): # in the corresponding universe, in which case this # is returned directly. This works since the Universe.atoms # are unique by construction. - if group.universe and group is group.universe.atoms: + if group is group.universe.atoms: return group return unique(group[:]) From f4e3b65ebc0094e07ad473d7bb4bd09b0e27ba34 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Mon, 12 Sep 2016 13:23:07 +0200 Subject: [PATCH 6/9] Use isinstance instead of checking against format class variable. --- package/MDAnalysis/core/AtomGroup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py index 943f96193a7..2870de5ac2d 100644 --- a/package/MDAnalysis/core/AtomGroup.py +++ b/package/MDAnalysis/core/AtomGroup.py @@ -5051,7 +5051,7 @@ def transfer_to_memory(self, frame_interval=1): from ..coordinates.memory import MemoryReader - if self.trajectory.format != MemoryReader.format: + if not isinstance(self.trajectory, MemoryReader): # Try to extract coordinates using Timeseries object # This is significantly faster, but only implemented for certain From 7a999e4da7caaea7edbafa57342bfa6ee19cf09e Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Mon, 12 Sep 2016 14:42:15 +0200 Subject: [PATCH 7/9] Inreased travis process timeout to 400s --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7266c389052..47da5a58939 100644 --- a/.travis.yml +++ b/.travis.yml @@ -63,7 +63,7 @@ install: # command to run tests script: - - ./testsuite/MDAnalysisTests/mda_nosetests --with-coverage --cover-package MDAnalysis --processes=2 --process-timeout=300 --with-memleak + - ./testsuite/MDAnalysisTests/mda_nosetests --with-coverage --cover-package MDAnalysis --processes=2 --process-timeout=400 --with-memleak - | test ${TRAVIS_PULL_REQUEST} == "false" && \ test ${TRAVIS_BRANCH} == ${GH_DOC_BRANCH} && \ From 4201b420d967bb61208be657bbcef1f6cf266621 Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Mon, 12 Sep 2016 21:14:09 +0200 Subject: [PATCH 8/9] Bugfix in MemoryReader: now forces numpy array copy when using in_memory option of Universe constructor --- package/MDAnalysis/core/AtomGroup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/AtomGroup.py b/package/MDAnalysis/core/AtomGroup.py index 2870de5ac2d..6116aea7056 100644 --- a/package/MDAnalysis/core/AtomGroup.py +++ b/package/MDAnalysis/core/AtomGroup.py @@ -5064,7 +5064,7 @@ def transfer_to_memory(self, frame_interval=1): # fall back to a slower approach except AttributeError: coordinates = \ - np.array([ts.positions for ts in + np.array([np.copy(ts.positions[:]) for ts in self.trajectory[::frame_interval]]) coordinates = coordinates.swapaxes(0, 1) From 1c7fe3436d9c0bfd557498d76e6e1ac8d4886f6a Mon Sep 17 00:00:00 2001 From: Wouter Boomsma Date: Thu, 15 Sep 2016 10:00:21 +0200 Subject: [PATCH 9/9] Extra documentation and minor bugfix --- package/MDAnalysis/analysis/align.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index d2712f2cb75..eb966c8eed9 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -183,6 +183,7 @@ import MDAnalysis.lib.qcprot as qcp from MDAnalysis.exceptions import SelectionError, SelectionWarning import MDAnalysis.analysis.rms as rms +from MDAnalysis.coordinates.memory import MemoryReader # remove after rms_fit_trj deprecation over from MDAnalysis.lib.log import ProgressMeter @@ -656,7 +657,8 @@ def rms_fit_trj( *in_memory* Default: ``False`` - ``True``: Switch to an in-memory trajectory so that alignment can - be done in-place. + be done in-place, which can improve performance substantially in + some cases. *kwargs* All other keyword arguments are passed on the trajectory @@ -687,7 +689,7 @@ def rms_fit_trj( kwargs.setdefault('remarks', 'RMS fitted trajectory to reference') writer = None - if in_memory: + if in_memory or isinstance(traj.trajectory, MemoryReader): traj.transfer_to_memory() frames = traj.trajectory filename = None