diff --git a/package/CHANGELOG b/package/CHANGELOG index c1f0127c1b4..77a2288672e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -116,6 +116,9 @@ Enhancements * Added weights_groupselections option in RMSD for mapping custom weights to groupselections (PR #2610, Issue #2429) * PersistenceLength.plot() now create new axes if current axes not provided (Issue #2590) + * Added a correlations module to provide functionality for analysis modules to + calculate the discrete autocorrelation function in a standardised way. Added + the capability to allow intermittent behaviour (PR #2256) Changes * Deprecated :class:`ProgressMeter` and replaced it with :class:`ProgressBar` using diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index d158267de0f..8b89425c961 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -334,6 +334,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): from MDAnalysis.lib.log import ProgressBar from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch from MDAnalysis.lib import distances +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency logger = logging.getLogger('MDAnalysis.analysis.hbonds') @@ -382,6 +383,9 @@ class HydrogenBondAnalysis(base.AnalysisBase): DEFAULT_DONORS/ACCEPTORS is now embedded in a dict to switch between default values for different force fields. + .. versionchanged:: 0.21.0 + Added autocorrelation (MDAnalysis.lib.correlations.py) for calculating hydrogen bond lifetime + .. versionchanged:: 1.0.0 ``save_table()`` method has been removed. You can use ``np.save()`` or ``cPickle.dump()`` on :attr:`HydrogenBondAnalysis.table` instead. @@ -1409,3 +1413,56 @@ def _make_dict(donors, hydrogens): h2donor.update(_make_dict(s1d, s1h)) return h2donor + + + def autocorrelation(self, tau_max=20, window_step=1, intermittency=0): + """ + Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds. + + Parameters + ---------- + window_step : int, optional + Jump every `step`-th frame. This is compatible but independant of the taus used. + Note that `step` and `tau_max` work consistently with intermittency. + tau_max : int, optional + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` + intermittency : int, optional + The maximum number of consecutive frames for which a bond can disappear but be counted as present if it + returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does + not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may + disappear for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + + Returns + ------- + tau_timeseries : list + tau from 1 to `tau_max`. Saved in the field tau_timeseries. + timeseries : list + autcorrelation value for each value of `tau`. Saved in the field timeseries. + timeseries_data: list + raw datapoints from which the average is taken (timeseries). + Time dependancy and distribution can be extracted. + """ + + if self._timeseries is None: + logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found") + logging.error("Autocorrelation: Please use the .run() before calling this function") + return + + if self.step != 1: + logging.warning("Autocorrelation function should be carried out on consecutive frames. ") + logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'") + + # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..] + hydrogen_bonds = self.timeseries + found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds] + + intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency) + tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max, + window_step=window_step) + self.acf_tau_timeseries = tau_timeseries + self.acf_timeseries = timeseries + # user can investigate the distribution and sample size + self.acf_timeseries_data = timeseries_data + + return self diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index c40f1d16537..1d064e5c34e 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -301,6 +301,11 @@ def __init__(self, universe, nruns=1, # number of times to iterate through the trajectory nsamples=50, # number of different points to sample in a run pbc=True): + + #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + # "which has .autocorrelation function", + # category=DeprecationWarning) + self.u = universe # check that slicing is possible try: diff --git a/package/MDAnalysis/analysis/utils/__init__.py b/package/MDAnalysis/analysis/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 0fa1e9b5033..70d42a26a93 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -297,10 +297,6 @@ for tau, sp in zip(tau_timeseries, sp_timeseries): print("{time} {sp}".format(time=tau, sp=sp)) - # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0: - tau_timeseries.insert(0, 0) - sp_timeseries.insert(1, 0) - # plot plt.xlabel('Time') plt.ylabel('SP') @@ -449,12 +445,15 @@ """ from __future__ import print_function, division, absolute_import +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency +import MDAnalysis.analysis.hbonds +from six.moves import range, zip_longest +import logging import warnings +import numpy as np -from six.moves import range, zip_longest -import numpy as np -import MDAnalysis.analysis.hbonds +logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') from MDAnalysis.lib.log import ProgressBar @@ -507,7 +506,9 @@ class HydrogenBondLifetimes(object): may have failed in some cases. """ - def __init__(self, universe, selection1, selection2, t0, tf, dtmax): + + def __init__(self, universe, selection1, selection2, t0, tf, dtmax, + nproc=1): self.universe = universe self.selection1 = selection1 self.selection2 = selection2 @@ -1171,7 +1172,8 @@ class SurvivalProbability(object): .. versionadded:: 0.11.0 - + .. versionchanged:: 0.21.0 + Using the MDAnalysis.lib.correlations.py to carry out the intermittency and autocorrelation calculations .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` """ @@ -1195,62 +1197,6 @@ def __init__(self, universe, select, t0=None, tf=None, dtmax=None, verbose=False self.tau_max = dtmax warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning) - def print(self, verbose, *args): - if self.verbose: - print(args) - elif verbose: - print(args) - - def _correct_intermittency(self, intermittency, selected_ids): - """ - Pre-process Consecutive Intermittency with a single pass over the data. - If an atom is absent for a number of frames equal or smaller - than the `intermittency`, then correct the data and remove the absence. - ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - - Parameters - ---------- - intermittency : int - the max gap allowed and to be corrected - selected_ids: list of ids - modifies the selecteded IDs in place by adding atoms which left for <= `intermittency` - """ - self.print('Correcting the selected IDs for intermittancy (gaps). ') - if intermittency == 0: - return - - for i, ids in enumerate(selected_ids): - # initially update each frame as seen 0 ago (now) - seen_frames_ago = {i: 0 for i in ids} - for j in range(1, intermittency + 2): - for atomid in seen_frames_ago.keys(): - # no more frames - if i + j >= len(selected_ids): - continue - - # if the atom is absent now - if not atomid in selected_ids[i + j]: - # increase its absence counter - seen_frames_ago[atomid] += 1 - continue - - # the atom is found - if seen_frames_ago[atomid] == 0: - # the atom was present in the last frame - continue - - # it was absent more times than allowed - if seen_frames_ago[atomid] > intermittency: - continue - - # the atom was absent but returned (within <= intermittency) - # add it to the frames where it was absent. - # Introduce the corrections. - for k in range(seen_frames_ago[atomid], 0, -1): - selected_ids[i + j - k].add(atomid) - - seen_frames_ago[atomid] = 0 - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ @@ -1286,6 +1232,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau from 1 to `tau_max`. Saved in the field tau_timeseries. sp_timeseries : list survival probability for each value of `tau`. Saved in the field sp_timeseries. + sp_timeseries_data: list + raw datapoints from which the average is taken (sp_timeseries). + Time dependancy and distribution can be extracted. """ # backward compatibility (and priority) @@ -1306,9 +1255,13 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte raise ValueError("Too few frames selected for given tau_max.") # preload the frames (atom IDs) to a list of sets - self.selected_ids = [] + self._selected_ids = [] + + # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, + # on which the parallel _single_frame can be applied. - # skip frames that will not be used + # skip frames that will not be used in order to improve performance + # because AtomGroup.select_atoms is the most expensive part of this calculation # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip # Intermittency means that we have to load the extra frames to know if the atom is actually missing. # Say step=5 and tau=1, intermittency=0: LLSSS LLSSS @@ -1323,7 +1276,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window: - self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip) + logger.info("Skipping the next %d frames:", num_frames_to_skip) frame_no += num_frames_to_skip frame_loaded_counter = 0 # Correct the number of frames to be loaded after the first window (which starts at t=0, and @@ -1334,46 +1287,31 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # update the frame number self.universe.trajectory[frame_no] - self.print(verbose, "Loading frame:", self.universe.trajectory.ts) + logger.info("Loading frame: %d", self.universe.trajectory.frame) atoms = self.universe.select_atoms(self.selection) # SP of residues or of atoms ids = atoms.residues.resids if residues else atoms.ids - self.selected_ids.append(set(ids)) + self._selected_ids.append(set(ids)) frame_no += 1 frame_loaded_counter += 1 - # correct the dataset for gaps (intermittency) - self._correct_intermittency(intermittency, self.selected_ids) - - # calculate Survival Probability - tau_timeseries = np.arange(1, tau_max + 1) - sp_timeseries_data = [[] for _ in range(tau_max)] - # adjust for the frames that were not loaded (step>tau_max + 1), # and for extra frames that were loaded (intermittency) window_jump = step - num_frames_to_skip - for t in range(0, len(self.selected_ids), window_jump): - Nt = len(self.selected_ids[t]) - - if Nt == 0: - self.print(verbose, - "At frame {} the selection did not find any molecule. Moving on to the next frame".format(t)) - continue - - for tau in tau_timeseries: - if t + tau >= len(self.selected_ids): - break + self._intermittent_selected_ids = correct_intermittency(self._selected_ids, intermittency=intermittency) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(self._intermittent_selected_ids, + tau_max, window_jump) - # continuous: IDs that survive from t to t + tau and at every frame in between - Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1])) - sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + # warn the user if the NaN are found + if all(np.isnan(sp_timeseries[1:])): + logger.warning('NaN Error: Most likely data was not found. Check your atom selections. ') # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data self.tau_timeseries = tau_timeseries - self.sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + self.sp_timeseries = sp_timeseries return self diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py new file mode 100644 index 00000000000..483b5fcf97e --- /dev/null +++ b/package/MDAnalysis/lib/correlations.py @@ -0,0 +1,259 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""Correlations utilities --- :mod:`MDAnalysis.lib.correlations` +================================================================================= + + +:Authors: Paul Smith & Mateusz Bieniek +:Year: 2020 +:Copyright: GNU Public License v2 + +.. versionadded:: 0.21.0 + +This module is primarily for internal use by other analysis modules. It +provides functionality for calculating the time autocorrelation function +of a binary variable (i.e one that is either true or false at each +frame for a given atom/molecule/set of molecules). This module includes +functions for calculating both the time continuous autocorrelation and +the intermittent autocorrelation. The function :func:`autocorrelation` +calculates the continuous autocorrelation only. The data may be +pre-processed using the function :func:`intermittency` in order to +acount for intermittency before passing the results to +:func:`autocorrelation`. + +This module is inspired by seemingly disparate analyses that rely on the same +underlying calculation, including the survival probability of water around +proteins [Araya-Secchi2014]_, hydrogen bond lifetimes [Gowers2015]_, [Araya-Secchi2014]_, +and the rate of cholesterol flip-flop in lipid bilayers [Gu2019]_. + +.. seeAlso:: + + Analysis tools that make use of modules: + + * :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability` + Calculates the continuous or intermittent survival probability + of an atom group in a region of interest. + + * :class:`MDAnalysis.analysis.hbonds.hbond_analysis` + Calculates the continuous or intermittent hydrogen bond + lifetime. + +.. rubric:: References + +.. [Gu2019] Gu, R.-X.; Baoukina, S.; Tieleman, D. P. (2019) + Cholesterol Flip-Flop in Heterogeneous Membranes. + J. Chem. Theory Comput. 15 (3), 2064–2070. + https://doi.org/10.1021/acs.jctc.8b00933. + +""" + +from __future__ import absolute_import, division +import numpy as np +from copy import deepcopy + + +def autocorrelation(list_of_sets, tau_max, window_step=1): + r"""Implementation of a discrete autocorrelation function. + + The autocorrelation of a property :math:`x` from a time :math:`t=t_0` to :math:`t=t_0 + \tau` + is given by: + + .. math:: + C(\tau) = \langle \frac{ x(t_0)x(t_0 +\tau) }{ x(t_0)x(t_0) } \rangle + + where :math:`x` may represent any property of a particle, such as velocity or + potential energy. + + This function is an implementation of a special case of the time + autocorrelation function in which the property under consideration can + be encoded with indicator variables, :math:`0` and :math:`1`, to represent the binary + state of said property. This special case is often referred to as the + survival probability (:math:`S(\tau)`). As an example, in calculating the survival + probability of water molecules within 5 Å of a protein, each water + molecule will either be within this cutoff range (:math:`1`) or not (:math:`0`). The + total number of water molecules within the cutoff at time :math:`t_0` will be + given by :math:`N(t_0)`. Other cases include the Hydrogen Bond Lifetime as + well as the translocation rate of cholesterol across a bilayer. + + The survival probability of a property of a set of particles is + given by: + + .. math:: + S(\tau) = \langle \frac{ N(t_0, t_0 + \tau )} { N(t_0) }\rangle + + where :math:`N(t0)` is the number of particles at time :math:`t_0` for which the feature + is observed, and :math:`N(t0, t_0 + \tau)` is the number of particles for which + this feature is present at every frame from :math:`t_0` to :math:`N(t0, t_0 + \tau)`. + The angular brackets represent an average over all time origins, :math:`t_0`. + + See [Araya-Secchi2014]_ for a description survival probability. + + Parameters + ---------- + list_of_sets : list + List of sets. Each set corresponds to data from a single frame. Each element in a set + may be, for example, an atom id or a tuple of atoms ids. In the case of calculating the + survival probability of water around a protein, these atom ids in a given set will be + those of the atoms which are within a cutoff distance of the protein at a given frame. + tau_max : int + The last tau (lag time, inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20, + the survival probability will be calculated over 20 frames. + window_step : int, optional + The step size for t0 to perform autocorrelation. Ideally, window_step will be larger than + tau_max to ensure independence of each window for which the calculation is performed. + Default is 1. + + Returns + -------- + tau_timeseries : list of int + the values of tau for which the autocorrelation was calculated + timeseries : list of int + the autocorelation values for each of the tau values + timeseries_data : list of list of int + the raw data from which the autocorrelation is computed, i.e :math:`S(\tau)` at each window. + This allows the time dependant evolution of :math:`S(\tau)` to be investigated. + + .. versionadded:: 0.19.2 + """ + + # check types + if (type(list_of_sets) != list and len(list_of_sets) != 0) or type(list_of_sets[0]) != set: + raise TypeError("list_of_sets must be a one-dimensional list of sets") # pragma: no cover + + # Check dimensions of parameters + if len(list_of_sets) < tau_max: + raise ValueError("tau_max cannot be greater than the length of list_of_sets") # pragma: no cover + + tau_timeseries = list(range(1, tau_max + 1)) + timeseries_data = [[] for _ in range(tau_max)] + + # calculate autocorrelation + for t in range(0, len(list_of_sets), window_step): + Nt = len(list_of_sets[t]) + + if Nt == 0: + continue + + for tau in tau_timeseries: + if t + tau >= len(list_of_sets): + break + + # continuous: IDs that survive from t to t + tau and at every frame in between + Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1])) + timeseries_data[tau - 1].append(Ntau / float(Nt)) + + timeseries = [np.mean(x) for x in timeseries_data] + + # at time 0 the value has to be one + tau_timeseries.insert(0, 0) + timeseries.insert(0, 1) + + return tau_timeseries, timeseries, timeseries_data + + +def correct_intermittency(list_of_sets, intermittency): + r"""Preprocess data to allow intermittent behaviour prior to calling :func:`autocorrelation`. + + Survival probabilty may be calculated either with a strict continuous requirement or + a less strict intermittency. If calculating the survival probability water around a + protein for example, in the former case the water must be within a cutoff distance + of the protein at every frame from :math:`t_0` to :math:`t_0 + \tau` in order for it to be considered + present at :math:`t_0 + \tau`. In the intermittent case, the water molecule is allowed to + leave the region of interest for up to a specified consecutive number of frames whilst still + being considered present at :math:`t_0 + \tau`. + + This function pre-processes data, such as the atom ids of water molecules within a cutoff + distance of a protein at each frame, in order to allow for intermittent behaviour, with a + single pass over the data. + + For example, if an atom is absent for a number of frames equal or smaller than the parameter + :attr:`intermittency`, then this absence will be removed and thus the atom is considered to have + not left. + e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. + + The returned data can be used as input to the function :func:`autocorrelation` in order + to calculate the survival probability with a given intermittency. + + See [Gowers2015]_ for a description of + intermittency in the calculation of hydrogen bond lifetimes. + + # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds) + + Parameters + ---------- + list_of_sets: list + In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a + single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}] + intermittency : int + The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no + changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two + consecutive frames will be instead be considered present. + + Returns + ------- + list_of_sets: list + returns a new list with the IDs with added IDs which disappeared for <= :attr:`intermittency`. + e.g If [{0, 1}, {0}, {0}, {0, 1}] is a list of sets of atom ids present at each frame and `intermittency=2`, + both atoms will be considered present throughout and thus the returned list of sets will be + [{0, 1}, {0, 1}, {0, 1}, {0, 1}]. + + """ + if intermittency == 0: + return list_of_sets + + list_of_sets = deepcopy(list_of_sets) + + for i, ids in enumerate(list_of_sets): + # initially update each frame as seen 0 ago (now) + seen_frames_ago = {i: 0 for i in ids} + for j in range(1, intermittency + 2): + for atomid in seen_frames_ago.keys(): + # no more frames + if i + j >= len(list_of_sets): + continue + + # if the atom is absent now + if not atomid in list_of_sets[i + j]: + # increase its absence counter + seen_frames_ago[atomid] += 1 + continue + + # the atom is found + if seen_frames_ago[atomid] == 0: + # the atom was present in the last frame + continue + + # it was absent more times than allowed + if seen_frames_ago[atomid] > intermittency: + continue + + # the atom was absent but returned (within <= intermittency_value) + # add it to the frames where it was absent. + # Introduce the corrections. + for k in range(seen_frames_ago[atomid], 0, -1): + list_of_sets[i + j - k].add(atomid) + + seen_frames_ago[atomid] = 0 + return list_of_sets + diff --git a/package/doc/sphinx/source/documentation_pages/lib/correlations.rst b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst new file mode 100644 index 00000000000..e68b0eba864 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst @@ -0,0 +1,9 @@ +.. automodule:: MDAnalysis.lib.correlations + + Autocorrelation Function + ------------------------ + .. autofunction:: MDAnalysis.lib.correlations.autocorrelation + + Intermittency Function + ---------------------- + .. autofunction:: MDAnalysis.lib.correlations.correct_intermittency \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/lib_modules.rst b/package/doc/sphinx/source/documentation_pages/lib_modules.rst index 0394d3fa28e..29ba1a05e8a 100644 --- a/package/doc/sphinx/source/documentation_pages/lib_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/lib_modules.rst @@ -59,6 +59,7 @@ List of modules ./lib/transformations ./lib/qcprot ./lib/util + ./lib/correlations Low level file formats ---------------------- @@ -74,4 +75,4 @@ Python-based projects. :maxdepth: 1 ./lib/formats/libmdaxdr - ./lib/formats/libdcd + ./lib/formats/libdcd \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index 388200b5b02..93979aa44ea 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -400,6 +400,39 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table): assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)") +class TestHydrogenBondAnalysisAutocorrelation(object): + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + @staticmethod + @pytest.fixture(scope='class') + def selection(): + return "byres name OH2" + + def test_HydrogenBondLifetimes(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=4) + hbonds.autocorrelation(tau_max=3) + assert_almost_equal(hbonds.acf_timeseries[3], 0.75) + + def test_HydrogenBondLifetimes_growing_continuous(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=9) + hbonds.autocorrelation(tau_max=5) + assert all([frame >= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])]) + + + class TestHydrogenBondAnalysisTIP3PHeavyPBC(object): @staticmethod @pytest.fixture(scope='class') diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e6dc29b0f35..3d2325e1a21 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,9 +23,9 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency from MDAnalysisTests.datafiles import waterPSF, waterDCD -from MDAnalysisTests.datafiles import PDB, XTC import pytest import numpy as np @@ -86,13 +86,157 @@ def test_MeanSquareDisplacement_zeroMolecules(universe): assert_almost_equal(msd_zero.timeseries[1], 0.0) +def test_SurvivalProbability_intermittency1and2(universe): + """ + Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, + but then returns at the following step. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9, 8), (), (8,), (9,), (8,), (), (9, 8), (), (8,), (9, 8)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) + assert all(x == {9, 8} for x in sp._intermittent_selected_ids) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1]) + + +def test_SurvivalProbability_intermittency2lacking(universe): + """ + If an atom is not present for more than 2 consecutive frames, + it is considered to have left the region. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9,), (), (), (), (9,), (), (), (), (9,)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) + assert_almost_equal(sp.sp_timeseries, [1, 0, 0, 0]) + + +def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): + """ + Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. + No frames should be skipped. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(2, 3), (3,), (2, 3), (3,), (2,), (3,), (2, 3), (3,), (2, 3), (2, 3)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) + assert all((x == {2, 3} for x in sp._intermittent_selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + + +def test_SurvivalProbability_intermittency1_step5_Skipping(universe): + """ + Step leads to skipping frames if (tau_max + 1) * (intermittency * 2) < step. + In this case one frame will be skipped per window. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(1,), (), (1,), (), (1,), (), (1,), (), (1,), (1,)] + beforepopsing = len(ids) - 2 + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) + assert all((x == {1} for x in sp._intermittent_selected_ids)) + assert len(sp._selected_ids) == beforepopsing + assert_almost_equal(sp.sp_timeseries, [1, 1]) + + +def test_intermittency_none(): + # No changes asked - returns the same data + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=0) + assert all(x == y for x,y in zip(input_ids, corrected)) + + +def test_intermittency_1and2(): + # The maximum gap in the dataset is 2, so the IDs are always present after correction + input_ids = [{9, 8}, set(), {8, }, {9, }, {8, }, set(), {9, 8}, set(), {8, }, {9, 8, }] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {9, 8} for x in corrected)) + + +def test_intermittency_2tooShort(): + #The IDs are abscent for too long/ + input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all(x == y for x, y in zip(input_ids, corrected)) + + +def test_intermittency_setsOfSets(): + # Verificaiton for the case of hydrogen bonds (sets of sets) + input_ids = [{frozenset({1,2}), frozenset({3, 4})},set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {frozenset({1, 2}), frozenset({3, 4})} for x in corrected)) + + +def test_autocorrelation_alwaysPresent(): + input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3) + assert all(np.equal(sp_timeseries, 1)) + + +def test_autocorrelation_definedTaus(): + input_ids = [{9, 8, 7}, {8, 7, 6}, {7, 6, 5}, {6, 5, 4}, {5, 4, 3}, {4, 3, 2}, {3, 2, 1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3) + assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0]) + + +def test_autocorrelation_intermittency1_windowJump_intermittencyAll(): + """ + Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. + No frames should be skipped so intermittency should be applied to all. + """ + input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}] + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) + assert all((x == {2, 3} for x in corrected)) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_windowBigJump(): + #The empty sets are ignored (no intermittency) + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_windowBigJump_absence(): + # In the last frame the molecules are absent + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) + assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.]) + + +def test_autocorrelation_intermittency1_many(): + input_ids = [{1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=14, + window_step=5) + assert_almost_equal(sp_timeseries, [1] * 15) + + +def test_autocorrelation_intermittency2_windowBigJump(): + # The intermittency corrects the last frame + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=2) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + def test_SurvivalProbability_t0tf(universe): with patch.object(universe, 'select_atoms') as select_atoms_mock: ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )] select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=2, stop=6) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_definedTaus(universe): @@ -101,7 +245,7 @@ def test_SurvivalProbability_definedTaus(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_zeroMolecules(universe): @@ -109,7 +253,7 @@ def test_SurvivalProbability_zeroMolecules(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=3, stop=6, verbose=True) - assert all(np.isnan(sp.sp_timeseries)) + assert all(np.isnan(sp.sp_timeseries[1:])) def test_SurvivalProbability_alwaysPresent(universe): @@ -125,7 +269,7 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, step=5, stop=9, verbose=True) - assert_equal(sp.sp_timeseries, [1, 1]) + assert_equal(sp.sp_timeseries, [1, 1, 1]) # with tau_max=2 for all the frames we only read 6 of them # this is because the frames which are not used are skipped, and therefore 'select_atoms' assert universe.trajectory.n_frames > 6 @@ -138,58 +282,3 @@ def test_SurvivalProbability_stepEqualDtMax(universe): sp.run(tau_max=4, step=5, stop=9, verbose=True) # all frames from 0, with 9 inclusive assert_equal(select_atoms_mock.call_count, 10) - - -def test_SurvivalProbability_intermittency1and2(universe): - """ - Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, - but then returns at the following step. - """ - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - assert all((x == set([9, 8]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) - - -def test_SurvivalProbability_intermittency2lacking(universe): - """ - If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. - """ - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9,), (), (), (), (9,), (), (), (), (9,)] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) - assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) - - -def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): - """ - Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. - No frames should be skipped. - """ - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(2, 3), (3,), (2, 3), (3,), (2,), (3,), (2, 3), (3,), (2, 3), (2, 3)] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([2, 3]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1]) - -def test_SurvivalProbability_intermittency1_step5_Skipping(universe): - """ - Step leads to skipping frames if (tau_max + 1) * (intermittency * 2) < step. - In this case one frame will be skipped per window. - """ - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(1,), (), (1,), (), (1,), (), (1,), (), (1,), (1,)] - beforepopsing = len(ids) - 2 - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([1]) for x in sp.selected_ids)) - assert len(sp.selected_ids) == beforepopsing - assert_almost_equal(sp.sp_timeseries, [1]) \ No newline at end of file