From 8bc9d9f8aaf479b865e8b80f4b6f7be252e48894 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 10 Apr 2019 14:23:56 +0100 Subject: [PATCH 01/40] Autocorrelation extracted from SP. --- package/MDAnalysis/analysis/utils/__init__.py | 0 .../analysis/utils/autocorrelation.py | 105 ++++++++++++++++++ package/MDAnalysis/analysis/waterdynamics.py | 27 +---- 3 files changed, 109 insertions(+), 23 deletions(-) create mode 100644 package/MDAnalysis/analysis/utils/__init__.py create mode 100644 package/MDAnalysis/analysis/utils/autocorrelation.py diff --git a/package/MDAnalysis/analysis/utils/__init__.py b/package/MDAnalysis/analysis/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py new file mode 100644 index 00000000000..867eee2fa39 --- /dev/null +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -0,0 +1,105 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +A flexible implementation of an autocorrelation function. +""" + +import numpy as np + +# TODO add documentation +def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): + # FIXME - make sure that it is not just a list of numbers + + # correct the dataset for gaps (intermittency) + _correct_intermittency(intermittency, list_of_sets) + + # calculate Survival Probability + tau_timeseries = np.arange(1, tau_max + 1) + sp_timeseries_data = [[] for _ in range(tau_max)] + + for t in range(0, len(list_of_sets), window_jump): + Nt = len(list_of_sets[t]) + + if Nt == 0: + continue + + for tau in tau_timeseries: + if t + tau >= len(list_of_sets): + break + + # continuous: IDs that survive from t to t + tau and at every frame in between + Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1])) + sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + + sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + return tau_timeseries, sp_timeseries, sp_timeseries_data + + +def _correct_intermittency(intermittency, id_list, verbose=False): + """ + Pre-process Consecutive Intermittency with a single pass over the data. + If an atom is absent for a number of frames equal or smaller + than the parameter intermittency, then correct the data and remove the absence. + ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence + + :param intermittency: the max gap allowed and to be corrected + :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + """ + if intermittency == 0: + return + + if verbose: + print('Correcting the selected IDs for intermittancy (gaps). ') + + for i, ids in enumerate(id_list): + # initially update each frame as seen 0 ago (now) + seen_frames_ago = {i: 0 for i in ids} + for j in range(1, intermittency + 2): + for atomid in seen_frames_ago.keys(): + # no more frames + if i + j >= len(id_list): + continue + + # if the atom is absent now + if not atomid in id_list[i + j]: + # increase its absence counter + seen_frames_ago[atomid] += 1 + continue + + # the atom is found + if seen_frames_ago[atomid] == 0: + # the atom was present in the last frame + continue + + # it was absent more times than allowed + if seen_frames_ago[atomid] > intermittency: + continue + + # the atom was absent but returned (within <= intermittency) + # add it to the frames where it was absent. + # Introduce the corrections. + for k in range(seen_frames_ago[atomid], 0, -1): + id_list[i + j - k].add(atomid) + + seen_frames_ago[atomid] = 0 \ No newline at end of file diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index a0f3b8cd379..553f5903cf7 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -458,6 +458,7 @@ import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter +from .utils.autocorrelation import autocorrelation class HydrogenBondLifetimes(object): @@ -1408,36 +1409,16 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no += 1 frame_loaded_counter += 1 - # correct the dataset for gaps (intermittency) - self._correct_intermittency(intermittency, self.selected_ids) - - # calculate Survival Probability - tau_timeseries = np.arange(1, tau_max + 1) - sp_timeseries_data = [[] for _ in range(tau_max)] - # adjust for the frames that were not loaded (step>tau_max + 1), # and for extra frames that were loaded (intermittency) window_jump = step - num_frames_to_skip - for t in range(0, len(self.selected_ids), window_jump): - Nt = len(self.selected_ids[t]) - - if Nt == 0: - self.print(verbose, - "At frame {} the selection did not find any molecule. Moving on to the next frame".format(t)) - continue - - for tau in tau_timeseries: - if t + tau >= len(self.selected_ids): - break - - # continuous: IDs that survive from t to t + tau and at every frame in between - Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1])) - sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + tau_timeseries, sp_timeseries, sp_timeseries_data = \ + autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data self.tau_timeseries = tau_timeseries - self.sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + self.sp_timeseries = sp_timeseries return self From 29a30278fdfcbea48e75bfd1df12341469fca02d Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 13:55:47 +0100 Subject: [PATCH 02/40] Exhaustive testing for autocorrelation. It will now return 1 at tau=0. Now the function can be embedded into HydrogenBondLifetime. --- .../analysis/utils/autocorrelation.py | 15 ++- package/MDAnalysis/analysis/waterdynamics.py | 18 ++-- .../analysis/test_waterdynamics.py | 98 ++++++++++++++++--- 3 files changed, 111 insertions(+), 20 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 867eee2fa39..6f162e15222 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -29,13 +29,21 @@ # TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): + """ + + :param list_of_sets: Modifies in place! + :param tau_max: + :param window_jump: + :param intermittency: + :return: + """ # FIXME - make sure that it is not just a list of numbers # correct the dataset for gaps (intermittency) _correct_intermittency(intermittency, list_of_sets) # calculate Survival Probability - tau_timeseries = np.arange(1, tau_max + 1) + tau_timeseries = list(range(1, tau_max + 1)) sp_timeseries_data = [[] for _ in range(tau_max)] for t in range(0, len(list_of_sets), window_jump): @@ -53,6 +61,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + + # at time 0 the value has to be one + tau_timeseries.insert(0, 0) + sp_timeseries.insert(0, 1) + return tau_timeseries, sp_timeseries, sp_timeseries_data diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 553f5903cf7..d5143fa989d 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -297,10 +297,6 @@ for tau, sp in zip(tau_timeseries, sp_timeseries): print("{time} {sp}".format(time=tau, sp=sp)) - # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0: - tau_timeseries.insert(0, 0) - sp_timeseries.insert(1, 0) - # plot plt.xlabel('Time') plt.ylabel('SP') @@ -464,19 +460,19 @@ class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis - This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes" + This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs connectivity. A slow decay indicate very stables hydrogen bonds, like in ice. The HBL is also know as "Hydrogen Bond Population Relaxation" - (HBPR). In the continuos case we have: + (HBPR). In the continuous case we have: .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair - :math:`ij` during time interval :math:`t_0+\tau` (continuos) and + :math:`ij` during time interval :math:`t_0+\tau` (continuous) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: .. math:: @@ -1373,6 +1369,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # preload the frames (atom IDs) to a list of sets self.selected_ids = [] + # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, + # on which the parallel _single_frame can be applied. + # skip frames that will not be used # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip # Intermittency means that we have to load the extra frames to know if the atom is actually missing. @@ -1416,6 +1415,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau_timeseries, sp_timeseries, sp_timeseries_data = \ autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) + # warn the user if the NaN are found + if all(np.isnan(sp_timeseries[1:])): + # fixme - warn the user that the dataset. Use a standardised warning. + print('NaN Error: Most likely data was not found. Check your atom selections. ') + # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e6dc29b0f35..04e09c48611 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,9 +23,9 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics +from MDAnalysis.analysis.utils.autocorrelation import autocorrelation from MDAnalysisTests.datafiles import waterPSF, waterDCD -from MDAnalysisTests.datafiles import PDB, XTC import pytest import numpy as np @@ -92,7 +92,7 @@ def test_SurvivalProbability_t0tf(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=2, stop=6) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_definedTaus(universe): @@ -101,7 +101,7 @@ def test_SurvivalProbability_definedTaus(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_zeroMolecules(universe): @@ -109,7 +109,7 @@ def test_SurvivalProbability_zeroMolecules(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=3, stop=6, verbose=True) - assert all(np.isnan(sp.sp_timeseries)) + assert all(np.isnan(sp.sp_timeseries[1:])) def test_SurvivalProbability_alwaysPresent(universe): @@ -125,7 +125,7 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, step=5, stop=9, verbose=True) - assert_equal(sp.sp_timeseries, [1, 1]) + assert_equal(sp.sp_timeseries, [1, 1, 1]) # with tau_max=2 for all the frames we only read 6 of them # this is because the frames which are not used are skipped, and therefore 'select_atoms' assert universe.trajectory.n_frames > 6 @@ -150,8 +150,8 @@ def test_SurvivalProbability_intermittency1and2(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - assert all((x == set([9, 8]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + assert all((x == {9, 8} for x in sp.selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1]) def test_SurvivalProbability_intermittency2lacking(universe): @@ -163,7 +163,7 @@ def test_SurvivalProbability_intermittency2lacking(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) - assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 0, 0, 0]) def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): @@ -176,8 +176,9 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([2, 3]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1]) + assert all((x == {2, 3} for x in sp.selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + def test_SurvivalProbability_intermittency1_step5_Skipping(universe): """ @@ -190,6 +191,79 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([1]) for x in sp.selected_ids)) + assert all((x == {1} for x in sp.selected_ids)) assert len(sp.selected_ids) == beforepopsing - assert_almost_equal(sp.sp_timeseries, [1]) \ No newline at end of file + assert_almost_equal(sp.sp_timeseries, [1, 1]) + + +def test_autocorrelation_alwaysPresent(): + input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3) + assert all(np.equal(sp_timeseries, 1)) + + +def test_autocorrelation_definedTaus(): + input_ids = [{9, 8, 7}, {8, 7, 6}, {7, 6, 5}, {6, 5, 4}, {5, 4, 3}, {4, 3, 2}, {3, 2, 1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3) + assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0]) + + +def test_autocorrelation_intermittency1and2(): + """ + Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, + but then returns at the following step. + """ + input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) + # input_ids are modified in place, check if the gaps have been removed correctly + assert all((x == {9, 8} for x in input_ids)) + assert_almost_equal(sp_timeseries, [1, 1, 1, 1]) + + +def test_autocorrelation_intermittency2lacking(): + """ + If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. + """ + input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) + assert_almost_equal(sp_timeseries, [1, 0, 0, 0]) + + +def test_autocorrelation_intermittency1_windowJump_intermittencyAll(): + """ + Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. + No frames should be skipped so intermittency should be applied to all. + """ + input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, + intermittency=1, window_jump=5) + assert all((x == {2, 3} for x in input_ids)) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_intermittency0_windowBigJump(): + """ + The empty sets are ignored (no interrttency) + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_intermittency0_windowBigJump_absence(): + """ + In the last frame the molecules are absent + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.]) + + +def test_autocorrelation_intermittency2_windowBigJump(): + """ + The intermittency corrects the last frame + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, + intermittency=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) From b9d5996e6938abf6f0c066690b009d9ce3bd4ecc Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:42:32 +0100 Subject: [PATCH 03/40] A more general documentation in the autocorrelation. --- .../analysis/utils/autocorrelation.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 6f162e15222..dc4a186f040 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -21,15 +21,14 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -""" -A flexible implementation of an autocorrelation function. -""" - import numpy as np + # TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): """ + The descrete implementation of the autocorrelation function. + fixme - change to the numpy documentation :param list_of_sets: Modifies in place! :param tau_max: @@ -37,15 +36,16 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): :param intermittency: :return: """ - # FIXME - make sure that it is not just a list of numbers + # fixme - add checks if this is a list of sets + # fixme - check dimensions # correct the dataset for gaps (intermittency) _correct_intermittency(intermittency, list_of_sets) - # calculate Survival Probability tau_timeseries = list(range(1, tau_max + 1)) - sp_timeseries_data = [[] for _ in range(tau_max)] + timeseries_data = [[] for _ in range(tau_max)] + # calculate autocorrelation for t in range(0, len(list_of_sets), window_jump): Nt = len(list_of_sets[t]) @@ -58,15 +58,15 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): # continuous: IDs that survive from t to t + tau and at every frame in between Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1])) - sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + timeseries_data[tau - 1].append(Ntau / float(Nt)) - sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + timeseries = [np.mean(x) for x in timeseries_data] # at time 0 the value has to be one tau_timeseries.insert(0, 0) - sp_timeseries.insert(0, 1) + timeseries.insert(0, 1) - return tau_timeseries, sp_timeseries, sp_timeseries_data + return tau_timeseries, timeseries, timeseries_data def _correct_intermittency(intermittency, id_list, verbose=False): From f4e3570abb21ba055465c101bd5b48dfff761427 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:43:03 +0100 Subject: [PATCH 04/40] Updated test cases to illuminate the bug in #2247. --- .../MDAnalysisTests/analysis/test_waterdynamics.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 04e09c48611..1317c77afa2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -45,8 +45,17 @@ def universe(): def test_HydrogenBondLifetimes(universe): hbl = waterdynamics.HydrogenBondLifetimes( universe, SELECTION1, SELECTION1, 0, 5, 3) + hbl.run(stop=5) + assert_almost_equal(hbl.timeseries[2], 0.75) + + +def test_HydrogenBondLifetimes_growing_continuous(universe): + # The autocorrelation cannot grow + hbl = waterdynamics.HydrogenBondLifetimes( + universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5) hbl.run() - assert_almost_equal(hbl.timeseries[2][1], 0.75, 5) + # Index 0 in frameX[0] refers to the continuous, 1 to intermittent + assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])]) def test_WaterOrientationalRelaxation(universe): From d3a6a8a9eb8a9c18adf4aff16730d7681df646bc Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:43:55 +0100 Subject: [PATCH 05/40] First switch to using autocorrelation that passes the tests. --- package/MDAnalysis/analysis/waterdynamics.py | 213 +++---------------- 1 file changed, 27 insertions(+), 186 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index d5143fa989d..ad32d75074f 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -464,8 +464,8 @@ class HydrogenBondLifetimes(object): (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs - connectivity. A slow decay indicate very stables hydrogen bonds, like in - ice. The HBL is also know as "Hydrogen Bond Population Relaxation" + connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice). + The HBL is also know as "Hydrogen Bond Population Relaxation" (HBPR). In the continuous case we have: .. math:: @@ -507,6 +507,7 @@ class HydrogenBondLifetimes(object): def __init__(self, universe, selection1, selection2, t0, tf, dtmax, nproc=1): + # fixme - use the same interface you used in SP self.universe = universe self.selection1 = selection1 self.selection2 = selection2 @@ -516,196 +517,33 @@ def __init__(self, universe, selection1, selection2, t0, tf, dtmax, self.nproc = nproc self.timeseries = None - def _getC_i(self, HBP, t0, t): - """ - This function give the intermitent Hydrogen Bond Lifetime - C_i = / between t0 and t - """ - C_i = 0 - for i in range(len(HBP[t0])): - for j in range(len(HBP[t])): - if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]): - C_i += 1 - break - if len(HBP[t0]) == 0: - return 0.0 - else: - return float(C_i) / len(HBP[t0]) - def _getC_c(self, HBP, t0, t): - """ - This function give the continous Hydrogen Bond Lifetime - C_c = / between t0 and t - """ - C_c = 0 - dt = 1 - begt0 = t0 - HBP_cp = HBP - HBP_t0 = HBP[t0] - newHBP = [] - if t0 == t: - return 1.0 - while t0 + dt <= t: - for i in range(len(HBP_t0)): - for j in range(len(HBP_cp[t0 + dt])): - if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and - HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]): - newHBP.append(HBP_t0[i]) - break - C_c = len(newHBP) - t0 += dt - HBP_t0 = newHBP - newHBP = [] - if len(HBP[begt0]) == 0: - return 0 - else: - return C_c / float(len(HBP[begt0])) + def run(self, **kwargs): + # Get all hydrogen bonds + hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, + self.selection1, + self.selection2, + distance=3.5, + angle=120.0) + # fixme - use it only for the time requested + # fixme - this currently ignores the constructor parameters t0 tf etc + hydrogen_bonds.run(**kwargs) - def _intervC_c(self, HBP, t0, tf, dt): - """ - This function gets all the data for the h(t0)h(t0+dt)', where - t0 = 1,2,3,...,tf. This function give us one point of the final plot - HBL vs t - """ - a = 0 - count = 0 - for i in range(len(HBP)): - if (t0 + dt <= tf): - if t0 == t0 + dt: - b = self._getC_c(HBP, t0, t0) - break - b = self._getC_c(HBP, t0, t0 + dt) - t0 += dt - a += b - count += 1 - if count == 0: - return 1.0 - return a / count - - def _intervC_i(self, HBP, t0, tf, dt): - """ - This function gets all the data for the h(t0)h(t0+dt), where - t0 = 1,2,3,...,tf. This function give us a point of the final plot - HBL vs t - """ - a = 0 - count = 0 - for i in range(len(HBP)): - if (t0 + dt <= tf): - b = self._getC_i(HBP, t0, t0 + dt) - t0 += dt - a += b - count += 1 - return a / count - - def _finalGraphGetC_i(self, HBP, t0, tf, maxdt): - """ - This function gets the final data of the C_i graph. - """ - output = [] - for dt in range(maxdt): - a = self._intervC_i(HBP, t0, tf, dt) - output.append(a) - return output + # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] + hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - def _finalGraphGetC_c(self, HBP, t0, tf, maxdt): - """ - This function gets the final data of the C_c graph. - """ - output = [] - for dt in range(maxdt): - a = self._intervC_c(HBP, t0, tf, dt) - output.append(a) - return output + # Calculate the + tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax) - def _getGraphics(self, HBP, t0, tf, maxdt): - """ - Function that join all the results into a plot. - """ - a = [] - cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt) - inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt) - for i in range(len(cont)): - fix = [cont[i], inte[i]] - a.append(fix) - return a + # fixme - document + self.hydrogen_bonds = hydrogen_bonds.timeseries - def _HBA(self, ts, conn, universe, selAtom1, selAtom2, - verbose=False): - """ - Main function for calculate C_i and C_c in parallel. - """ - finalGetResidue1 = selAtom1 - finalGetResidue2 = selAtom2 - frame = ts.frame - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, - finalGetResidue1, - finalGetResidue2, - distance=3.5, - angle=120.0, - start=frame - 1, - stop=frame) - while True: - try: - h.run(verbose=verbose) - break - except: - print("error") - print("trying again") - sys.stdout.flush() - sys.stdout.flush() - conn.send(h.timeseries[0]) - conn.close() + # user can investigate the distribution and sample size + self.sp_timeseries_data = timeseries_data + self.tau_timeseries = tau_timeseries + self.timeseries = timeseries + return self - def run(self, **kwargs): - """Analyze trajectory and produce timeseries""" - h_list = [] - i = 0 - if (self.nproc > 1): - while i < len(self.universe.trajectory): - jobs = [] - k = i - for j in range(self.nproc): - # start - print("ts=", i + 1) - if i >= len(self.universe.trajectory): - break - conn_parent, conn_child = multiprocessing.Pipe(False) - while True: - try: - # new thread - jobs.append( - (multiprocessing.Process( - target=self._HBA, - args=(self.universe.trajectory[i], - conn_child, self.universe, - self.selection1, self.selection2,)), - conn_parent)) - break - except: - print("error in jobs.append") - jobs[j][0].start() - i = i + 1 - - for j in range(self.nproc): - if k >= len(self.universe.trajectory): - break - rec01 = jobs[j][1] - received = rec01.recv() - h_list.append(received) - jobs[j][0].join() - k += 1 - self.timeseries = self._getGraphics( - h_list, 0, self.tf - 1, self.dtmax) - else: - h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, - self.selection1, - self.selection2, - distance=3.5, - angle=120.0) - h_list.run(**kwargs) - self.timeseries = self._getGraphics( - h_list.timeseries, self.t0, self.tf, self.dtmax) class WaterOrientationalRelaxation(object): @@ -1347,6 +1185,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau from 1 to `tau_max`. Saved in the field tau_timeseries. sp_timeseries : list survival probability for each value of `tau`. Saved in the field sp_timeseries. + sp_timeseries_data: list + raw datapoints from which the average is taken (sp_timeseries). + Time dependancy and distribution can be extracted. """ # backward compatibility (and priority) From 236fad64b8a902744dd425988f698703e7eed841 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Mon, 15 Apr 2019 23:28:11 +0100 Subject: [PATCH 06/40] The documentation updated with the code. The HydrogenBondAnalysis should ideally be extracted from the HydrogenBondLifetime and fed into it in the constructor. --- .../analysis/utils/autocorrelation.py | 59 +++++-- package/MDAnalysis/analysis/waterdynamics.py | 154 +++++++++--------- 2 files changed, 124 insertions(+), 89 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index dc4a186f040..50891765009 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -24,23 +24,48 @@ import numpy as np -# TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): - """ - The descrete implementation of the autocorrelation function. - fixme - change to the numpy documentation - - :param list_of_sets: Modifies in place! - :param tau_max: - :param window_jump: - :param intermittency: - :return: + r"""The descrete implementation of the autocorrelation function. + + Here is a random equation that shows something. + + .. math:: + C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} + + + + fixme - list_of_sets: Modifies in place! + Parameters + ---------- + list_of_sets : list + List of sets, + tau_max : int + The last tau (inclusive) for which to carry out autocorrelation. + window_jump : int, optional + The step for the t0 to perform autocorrelation (without the overlap). Default is 1. + intermittency : int, optional + Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns, + intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any + frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will + be counted as present. + + Returns + -------- + tau_timeseries : list of int + the tau for which the autocorrelation was calculated + timeseries : list of int + the autocorelation values for each of the tau values + timeseries_data : list of list of int + the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated. + + .. versionadded:: 0.19.2 """ # fixme - add checks if this is a list of sets # fixme - check dimensions + # fixme - check parameters? # correct the dataset for gaps (intermittency) - _correct_intermittency(intermittency, list_of_sets) + correct_intermittency(intermittency, list_of_sets) tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] @@ -69,15 +94,21 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): return tau_timeseries, timeseries, timeseries_data -def _correct_intermittency(intermittency, id_list, verbose=False): +def correct_intermittency(intermittency, id_list, verbose=False): """ Pre-process Consecutive Intermittency with a single pass over the data. If an atom is absent for a number of frames equal or smaller than the parameter intermittency, then correct the data and remove the absence. ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - :param intermittency: the max gap allowed and to be corrected - :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + Parameters + ---------- + intermittency: int + the max gap allowed and to be corrected + id_list: fixme + modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + verbose: Boolean, optional + print """ if intermittency == 0: return diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index ad32d75074f..2635bacbbb3 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -450,15 +450,16 @@ from six.moves import range, zip_longest import numpy as np -import multiprocessing import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation + class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis + fixme - update the definition of intermittency (refer the user to autocorrelation) This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can @@ -492,48 +493,100 @@ class HydrogenBondLifetimes(object): It could be any selection available in MDAnalysis, not just water. selection2 : str Selection string to analize its HBL against selection1 - t0 : int - frame where analysis begins - tf : int - frame where analysis ends - dtmax : int - Maximum dt size, `dtmax` < `tf` or it will crash. - nproc : int - Number of processors to use, by default is 1. - + verbose : Boolean, optional + When True, prints progress and comments to the console. .. versionadded:: 0.11.0 """ - def __init__(self, universe, selection1, selection2, t0, tf, dtmax, - nproc=1): - # fixme - use the same interface you used in SP + def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False): self.universe = universe self.selection1 = selection1 self.selection2 = selection2 - self.t0 = t0 - self.tf = tf - 1 - self.dtmax = dtmax - self.nproc = nproc - self.timeseries = None + self.verbose = verbose + # backward compatibility + self.start = self.stop = self.tau_max = None + if t0 is not None: + self.start = t0 + warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning) + + if tf is not None: + self.stop = tf + warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning) + + if dtmax is not None: + self.tau_max = dtmax + warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning) + + + def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs): + """ + Computes and returns the Survival Probability (SP) timeseries + + Parameters + ---------- + start : int, optional + Zero-based index of the first frame to be analysed + stop : int, optional + Zero-based index of the last frame to be analysed (inclusive) + step : int, optional + Jump every `step`-th frame. This is compatible but independant of the taus used. + Note that `step` and `tau_max` work consistently with intermittency. + tau_max : int, optional + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` + intermittency : int, optional + The maximum number of consecutive frames for which a bond can disappear but be counted as present if it + returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does + not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may + disappear for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + verbose : Boolean, optional + Print the progress to the console + + Returns + ------- + tau_timeseries : list + tau from 1 to `tau_max`. Saved in the field tau_timeseries. + timeseries : list + survival probability for each value of `tau`. Saved in the field sp_timeseries. + timeseries_data: list + raw datapoints from which the average is taken (sp_timeseries). + Time dependancy and distribution can be extracted. + """ + + # backward compatibility (and priority) + start = self.start if self.start is not None else start + stop = self.stop if self.stop is not None else stop + tau_max = self.tau_max if self.tau_max is not None else tau_max + + # sanity checks + if stop is not None and stop >= len(self.universe.trajectory): + raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.") + + if stop is None: + stop = len(self.universe.trajectory) + else: + stop = stop + 1 + + if tau_max > (stop - start): + raise ValueError("Too few frames selected for given tau_max.") - def run(self, **kwargs): # Get all hydrogen bonds + # fixme - we should not have the frozen definition of the hydrogen bonds + # fixme - this will be upated with the new HydrogenBond interface hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, self.selection1, self.selection2, distance=3.5, angle=120.0) - # fixme - use it only for the time requested - # fixme - this currently ignores the constructor parameters t0 tf etc - hydrogen_bonds.run(**kwargs) + # Compute the minimal number of hydrogen bonds + hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs) # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] - hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] + found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - # Calculate the - tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax) + tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax) # fixme - document self.hydrogen_bonds = hydrogen_bonds.timeseries @@ -1094,62 +1147,13 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa self.tau_max = dtmax warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning) + def print(self, verbose, *args): if self.verbose: print(args) elif verbose: print(args) - def _correct_intermittency(self, intermittency, selected_ids): - """ - Pre-process Consecutive Intermittency with a single pass over the data. - If an atom is absent for a number of frames equal or smaller - than the `intermittency`, then correct the data and remove the absence. - ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - - Parameters - ---------- - intermittency : int - the max gap allowed and to be corrected - selected_ids: list of ids - modifies the selecteded IDs in place by adding atoms which left for <= `intermittency` - """ - self.print('Correcting the selected IDs for intermittancy (gaps). ') - if intermittency == 0: - return - - for i, ids in enumerate(selected_ids): - # initially update each frame as seen 0 ago (now) - seen_frames_ago = {i: 0 for i in ids} - for j in range(1, intermittency + 2): - for atomid in seen_frames_ago.keys(): - # no more frames - if i + j >= len(selected_ids): - continue - - # if the atom is absent now - if not atomid in selected_ids[i + j]: - # increase its absence counter - seen_frames_ago[atomid] += 1 - continue - - # the atom is found - if seen_frames_ago[atomid] == 0: - # the atom was present in the last frame - continue - - # it was absent more times than allowed - if seen_frames_ago[atomid] > intermittency: - continue - - # the atom was absent but returned (within <= intermittency) - # add it to the frames where it was absent. - # Introduce the corrections. - for k in range(seen_frames_ago[atomid], 0, -1): - selected_ids[i + j - k].add(atomid) - - seen_frames_ago[atomid] = 0 - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ From b3acf590d7ef319fba60e6eb596fe729a302142f Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 18 Apr 2019 13:20:34 +0100 Subject: [PATCH 07/40] Correction for the test to make it consistent with the previous author's test. Our end index is inclusive (5th frame, giving 6 frames). But the other author's analysis was done on 0-4 frames (5 frames). --- package/MDAnalysis/analysis/waterdynamics.py | 24 +++++++------------ .../analysis/test_waterdynamics.py | 5 ++-- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 2635bacbbb3..a77751f3618 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -445,16 +445,16 @@ """ from __future__ import print_function, division, absolute_import -import warnings - from six.moves import range, zip_longest - +import logging +import warnings import numpy as np import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation +logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') class HydrogenBondLifetimes(object): @@ -475,6 +475,7 @@ class HydrogenBondLifetimes(object): where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair :math:`ij` during time interval :math:`t_0+\tau` (continuous) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: + -fixme - update the intermittent definition .. math:: C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} @@ -575,6 +576,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F # Get all hydrogen bonds # fixme - we should not have the frozen definition of the hydrogen bonds # fixme - this will be upated with the new HydrogenBond interface + # fixme - remove this from here and insert it into the example hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, self.selection1, self.selection2, @@ -586,7 +588,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax) + tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max) # fixme - document self.hydrogen_bonds = hydrogen_bonds.timeseries @@ -1148,13 +1150,6 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning) - def print(self, verbose, *args): - if self.verbose: - print(args) - elif verbose: - print(args) - - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ Computes and returns the Survival Probability (SP) timeseries @@ -1232,7 +1227,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window: - self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip) + logger.info("Skipping the next %d frames:" % num_frames_to_skip) frame_no += num_frames_to_skip frame_loaded_counter = 0 # Correct the number of frames to be loaded after the first window (which starts at t=0, and @@ -1243,7 +1238,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # update the frame number self.universe.trajectory[frame_no] - self.print(verbose, "Loading frame:", self.universe.trajectory.ts) + logging.info("Loading frame:", self.universe.trajectory.ts) atoms = self.universe.select_atoms(self.selection) # SP of residues or of atoms @@ -1262,8 +1257,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # warn the user if the NaN are found if all(np.isnan(sp_timeseries[1:])): - # fixme - warn the user that the dataset. Use a standardised warning. - print('NaN Error: Most likely data was not found. Check your atom selections. ') + logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ') # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 1317c77afa2..978b82bc29b 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -44,9 +44,10 @@ def universe(): def test_HydrogenBondLifetimes(universe): hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, 0, 5, 3) + universe, SELECTION1, SELECTION1, 0, 4, 3) hbl.run(stop=5) - assert_almost_equal(hbl.timeseries[2], 0.75) + assert_almost_equal(hbl.timeseries[3], 0.75) + def test_HydrogenBondLifetimes_growing_continuous(universe): From edb1f8ffb57243ce4c0afad094504a13ce9f1c8f Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 18 Apr 2019 18:15:26 +0100 Subject: [PATCH 08/40] Extracting the autocorrelation and intermittency into their own functions with their own test cases. Removed the class HydrogenBondLifetimes and moved the autocorrelation functionality into the hbond_analysis class as a .autocorrelation method. This method should replace also hbond_autocorrel.py file. Test cases for the new method .autocorrelation are added. Added a test cases that, with our definition of intermittency, would fail in the case of the hbond_autocorrel.py. --- .../analysis/hbonds/hbond_analysis.py | 55 +++++ .../analysis/hbonds/hbond_autocorrel.py | 5 + .../analysis/utils/autocorrelation.py | 40 ++- package/MDAnalysis/analysis/waterdynamics.py | 138 ++--------- .../MDAnalysisTests/analysis/test_hbonds.py | 33 +++ .../analysis/test_waterdynamics.py | 232 +++++++++--------- 6 files changed, 251 insertions(+), 252 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index b0aefe862f6..6ac363ff59c 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -333,6 +333,8 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): from MDAnalysis.lib import distances from MDAnalysis.lib.util import deprecate +from ..utils.autocorrelation import autocorrelation, correct_intermittency + logger = logging.getLogger('MDAnalysis.analysis.hbonds') @@ -1425,3 +1427,56 @@ def _make_dict(donors, hydrogens): h2donor.update(_make_dict(s1d, s1h)) return h2donor + + + def autocorrelation(self, tau_max=20, window_step=1, intermittency=0): + """ + Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds. + + Parameters + ---------- + window_step : int, optional + Jump every `step`-th frame. This is compatible but independant of the taus used. + Note that `step` and `tau_max` work consistently with intermittency. + tau_max : int, optional + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` + intermittency : int, optional + The maximum number of consecutive frames for which a bond can disappear but be counted as present if it + returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does + not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may + disappear for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + + Returns + ------- + tau_timeseries : list + tau from 1 to `tau_max`. Saved in the field tau_timeseries. + timeseries : list + autcorrelation value for each value of `tau`. Saved in the field timeseries. + timeseries_data: list + raw datapoints from which the average is taken (timeseries). + Time dependancy and distribution can be extracted. + """ + + if self._timeseries is None: + logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found") + logging.error("Autocorrelation: Please use the .run() before calling this function") + return + + if self.step != 1: + logging.warning("Autocorrelation function should be carried out on consecutive frames. ") + logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'") + + # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..] + hydrogen_bonds = self.timeseries + found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds] + + intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency) + tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max, + window_step=window_step) + self.acf_tau_timeseries = tau_timeseries + self.acf_timeseries = timeseries + # user can investigate the distribution and sample size + self.acf_timeseries_data = timeseries_data + + return self diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 920ce718707..67f03a325c3 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -296,6 +296,11 @@ def __init__(self, universe, nruns=1, # number of times to iterate through the trajectory nsamples=50, # number of different points to sample in a run pbc=True): + + warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + "which has .autocorrelation function", + category=DeprecationWarning) + self.u = universe # check that slicing is possible try: diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 50891765009..efb25c40623 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -22,9 +22,10 @@ # import numpy as np +from copy import deepcopy -def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): +def autocorrelation(list_of_sets, tau_max, window_step=1): r"""The descrete implementation of the autocorrelation function. Here is a random equation that shows something. @@ -33,7 +34,6 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} - fixme - list_of_sets: Modifies in place! Parameters ---------- @@ -41,8 +41,9 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): List of sets, tau_max : int The last tau (inclusive) for which to carry out autocorrelation. - window_jump : int, optional + window_step : int, optional The step for the t0 to perform autocorrelation (without the overlap). Default is 1. + fixme - move intermittency to the right place intermittency : int, optional Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns, intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any @@ -64,14 +65,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): # fixme - check dimensions # fixme - check parameters? - # correct the dataset for gaps (intermittency) - correct_intermittency(intermittency, list_of_sets) - tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] # calculate autocorrelation - for t in range(0, len(list_of_sets), window_jump): + for t in range(0, len(list_of_sets), window_step): Nt = len(list_of_sets[t]) if Nt == 0: @@ -93,8 +91,8 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): return tau_timeseries, timeseries, timeseries_data - -def correct_intermittency(intermittency, id_list, verbose=False): +# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds) +def correct_intermittency(list_of_sets, intermittency): """ Pre-process Consecutive Intermittency with a single pass over the data. If an atom is absent for a number of frames equal or smaller @@ -103,30 +101,27 @@ def correct_intermittency(intermittency, id_list, verbose=False): Parameters ---------- + id_list: list of sets + returns a new list with the IDs with added IDs which disappeared for <= :param intermittency intermittency: int the max gap allowed and to be corrected - id_list: fixme - modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency - verbose: Boolean, optional - print """ if intermittency == 0: - return + return list_of_sets - if verbose: - print('Correcting the selected IDs for intermittancy (gaps). ') + list_of_sets = deepcopy(list_of_sets) - for i, ids in enumerate(id_list): + for i, ids in enumerate(list_of_sets): # initially update each frame as seen 0 ago (now) seen_frames_ago = {i: 0 for i in ids} for j in range(1, intermittency + 2): for atomid in seen_frames_ago.keys(): # no more frames - if i + j >= len(id_list): + if i + j >= len(list_of_sets): continue # if the atom is absent now - if not atomid in id_list[i + j]: + if not atomid in list_of_sets[i + j]: # increase its absence counter seen_frames_ago[atomid] += 1 continue @@ -140,10 +135,11 @@ def correct_intermittency(intermittency, id_list, verbose=False): if seen_frames_ago[atomid] > intermittency: continue - # the atom was absent but returned (within <= intermittency) + # the atom was absent but returned (within <= intermittency_value) # add it to the frames where it was absent. # Introduce the corrections. for k in range(seen_frames_ago[atomid], 0, -1): - id_list[i + j - k].add(atomid) + list_of_sets[i + j - k].add(atomid) - seen_frames_ago[atomid] = 0 \ No newline at end of file + seen_frames_ago[atomid] = 0 + return list_of_sets \ No newline at end of file diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index a77751f3618..1f578f2a7ed 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -450,32 +450,30 @@ import warnings import numpy as np -import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter -from .utils.autocorrelation import autocorrelation +from .utils.autocorrelation import autocorrelation, correct_intermittency logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis - fixme - update the definition of intermittency (refer the user to autocorrelation) + fixme - deprecate it - This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" + This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs - connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice). - The HBL is also know as "Hydrogen Bond Population Relaxation" - (HBPR). In the continuous case we have: + connectivity. A slow decay indicate very stables hydrogen bonds, like in + ice. The HBL is also know as "Hydrogen Bond Population Relaxation" + (HBPR). In the continuos case we have: .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair - :math:`ij` during time interval :math:`t_0+\tau` (continuous) and + :math:`ij` during time interval :math:`t_0+\tau` (continuos) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: - -fixme - update the intermittent definition .. math:: C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} @@ -494,111 +492,24 @@ class HydrogenBondLifetimes(object): It could be any selection available in MDAnalysis, not just water. selection2 : str Selection string to analize its HBL against selection1 - verbose : Boolean, optional - When True, prints progress and comments to the console. + t0 : int + frame where analysis begins + tf : int + frame where analysis ends + dtmax : int + Maximum dt size, `dtmax` < `tf` or it will crash. + nproc : int + Number of processors to use, by default is 1. + .. versionadded:: 0.11.0 """ def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False): - self.universe = universe - self.selection1 = selection1 - self.selection2 = selection2 - self.verbose = verbose - - # backward compatibility - self.start = self.stop = self.tau_max = None - if t0 is not None: - self.start = t0 - warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning) - - if tf is not None: - self.stop = tf - warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning) - - if dtmax is not None: - self.tau_max = dtmax - warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning) - - - def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs): - """ - Computes and returns the Survival Probability (SP) timeseries - - Parameters - ---------- - start : int, optional - Zero-based index of the first frame to be analysed - stop : int, optional - Zero-based index of the last frame to be analysed (inclusive) - step : int, optional - Jump every `step`-th frame. This is compatible but independant of the taus used. - Note that `step` and `tau_max` work consistently with intermittency. - tau_max : int, optional - Survival probability is calculated for the range 1 <= `tau` <= `tau_max` - intermittency : int, optional - The maximum number of consecutive frames for which a bond can disappear but be counted as present if it - returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does - not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may - disappear for up to two consecutive frames yet be treated as being present at all frames. - The default is continuous (0). - verbose : Boolean, optional - Print the progress to the console - - Returns - ------- - tau_timeseries : list - tau from 1 to `tau_max`. Saved in the field tau_timeseries. - timeseries : list - survival probability for each value of `tau`. Saved in the field sp_timeseries. - timeseries_data: list - raw datapoints from which the average is taken (sp_timeseries). - Time dependancy and distribution can be extracted. - """ - - # backward compatibility (and priority) - start = self.start if self.start is not None else start - stop = self.stop if self.stop is not None else stop - tau_max = self.tau_max if self.tau_max is not None else tau_max - - # sanity checks - if stop is not None and stop >= len(self.universe.trajectory): - raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.") - - if stop is None: - stop = len(self.universe.trajectory) - else: - stop = stop + 1 - - if tau_max > (stop - start): - raise ValueError("Too few frames selected for given tau_max.") - - # Get all hydrogen bonds - # fixme - we should not have the frozen definition of the hydrogen bonds - # fixme - this will be upated with the new HydrogenBond interface - # fixme - remove this from here and insert it into the example - hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, - self.selection1, - self.selection2, - distance=3.5, - angle=120.0) - # Compute the minimal number of hydrogen bonds - hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs) - - # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] - found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - - tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max) - - # fixme - document - self.hydrogen_bonds = hydrogen_bonds.timeseries - - # user can investigate the distribution and sample size - self.sp_timeseries_data = timeseries_data - self.tau_timeseries = tau_timeseries - self.timeseries = timeseries - return self - + warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + "which has .autocorrelation function", + category=DeprecationWarning) + pass class WaterOrientationalRelaxation(object): @@ -1207,7 +1118,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte raise ValueError("Too few frames selected for given tau_max.") # preload the frames (atom IDs) to a list of sets - self.selected_ids = [] + self._selected_ids = [] # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, # on which the parallel _single_frame can be applied. @@ -1243,7 +1154,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # SP of residues or of atoms ids = atoms.residues.resids if residues else atoms.ids - self.selected_ids.append(set(ids)) + self._selected_ids.append(set(ids)) frame_no += 1 frame_loaded_counter += 1 @@ -1252,8 +1163,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # and for extra frames that were loaded (intermittency) window_jump = step - num_frames_to_skip - tau_timeseries, sp_timeseries, sp_timeseries_data = \ - autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) + self._intermittent_selected_ids = correct_intermittency(self._selected_ids, intermittency=intermittency) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(self._intermittent_selected_ids, + tau_max, window_jump) # warn the user if the NaN are found if all(np.isnan(sp_timeseries[1:])): diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index 14223178abe..7b0076ae1cd 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -397,3 +397,36 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table): # https://github.com/MDAnalysis/mdanalysis/issues/801) for name, ref in reference_table.items(): assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)") + + +class TestHydrogenBondAnalysisAutocorrelation(object): + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + @staticmethod + @pytest.fixture(scope='class') + def selection(): + return "byres name OH2" + + def test_HydrogenBondLifetimes(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=4) + hbonds.autocorrelation(tau_max=3) + assert_almost_equal(hbonds.acf_timeseries[3], 0.75) + + def test_HydrogenBondLifetimes_growing_continuous(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=9) + hbonds.autocorrelation(tau_max=5) + assert all([frame >= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])]) + diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 978b82bc29b..e9a4440c7c5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,7 +23,7 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics -from MDAnalysis.analysis.utils.autocorrelation import autocorrelation +from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency from MDAnalysisTests.datafiles import waterPSF, waterDCD @@ -42,23 +42,6 @@ def universe(): return MDAnalysis.Universe(waterPSF, waterDCD) -def test_HydrogenBondLifetimes(universe): - hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, 0, 4, 3) - hbl.run(stop=5) - assert_almost_equal(hbl.timeseries[3], 0.75) - - - -def test_HydrogenBondLifetimes_growing_continuous(universe): - # The autocorrelation cannot grow - hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5) - hbl.run() - # Index 0 in frameX[0] refers to the continuous, 1 to intermittent - assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])]) - - def test_WaterOrientationalRelaxation(universe): wor = waterdynamics.WaterOrientationalRelaxation( universe, SELECTION1, 0, 5, 2) @@ -96,77 +79,24 @@ def test_MeanSquareDisplacement_zeroMolecules(universe): assert_almost_equal(msd_zero.timeseries[1], 0.0) -def test_SurvivalProbability_t0tf(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=2, stop=6) - assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) - - -def test_SurvivalProbability_definedTaus(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) - - -def test_SurvivalProbability_zeroMolecules(universe): - # no atom IDs found - with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=3, stop=6, verbose=True) - assert all(np.isnan(sp.sp_timeseries[1:])) - - -def test_SurvivalProbability_alwaysPresent(universe): - # always the same atom IDs found, 7 and 8 - with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert all(np.equal(sp.sp_timeseries, 1)) - - -def test_SurvivalProbability_stepLargerThanDtmax(universe): - # Testing if the frames are skipped correctly - with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=2, step=5, stop=9, verbose=True) - assert_equal(sp.sp_timeseries, [1, 1, 1]) - # with tau_max=2 for all the frames we only read 6 of them - # this is because the frames which are not used are skipped, and therefore 'select_atoms' - assert universe.trajectory.n_frames > 6 - assert_equal(select_atoms_mock.call_count, 6) - - -def test_SurvivalProbability_stepEqualDtMax(universe): - with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=4, step=5, stop=9, verbose=True) - # all frames from 0, with 9 inclusive - assert_equal(select_atoms_mock.call_count, 10) - - def test_SurvivalProbability_intermittency1and2(universe): """ Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, but then returns at the following step. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)] + ids = [(9, 8), (), (8,), (9,), (8,), (), (9, 8), (), (8,), (9, 8)] select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - assert all((x == {9, 8} for x in sp.selected_ids)) + assert all(x == {9, 8} for x in sp._intermittent_selected_ids) assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1]) def test_SurvivalProbability_intermittency2lacking(universe): """ - If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. + If an atom is not present for more than 2 consecutive frames, + it is considered to have left the region. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: ids = [(9,), (), (), (), (9,), (), (), (), (9,)] @@ -186,7 +116,7 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == {2, 3} for x in sp.selected_ids)) + assert all((x == {2, 3} for x in sp._intermittent_selected_ids)) assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) @@ -201,11 +131,42 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == {1} for x in sp.selected_ids)) - assert len(sp.selected_ids) == beforepopsing + assert all((x == {1} for x in sp._intermittent_selected_ids)) + assert len(sp._selected_ids) == beforepopsing assert_almost_equal(sp.sp_timeseries, [1, 1]) +def test_intermittency_none(): + # No changes asked - returns the same data + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=0) + assert all(x == y for x,y in zip(input_ids, corrected)) + + +def test_intermittency_1and2(): + # The maximum gap in the dataset is 2, so the IDs are always present after correction + input_ids = [{9, 8}, set(), {8, }, {9, }, {8, }, set(), {9, 8}, set(), {8, }, {9, 8, }] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {9, 8} for x in corrected)) + + +def test_intermittency_2tooShort(): + #The IDs are abscent for too long/ + input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all(x == y for x, y in zip(input_ids, corrected)) + + +def test_intermittency_setsOfSets(): + # Verificaiton for the case of hydrogen bonds (sets of sets) + input_ids = [{frozenset({1,2}), frozenset({3, 4})},set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {frozenset({1, 2}), frozenset({3, 4})} for x in corrected)) + + def test_autocorrelation_alwaysPresent(): input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}] tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3) @@ -218,62 +179,99 @@ def test_autocorrelation_definedTaus(): assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0]) -def test_autocorrelation_intermittency1and2(): - """ - Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, - but then returns at the following step. - """ - input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) - # input_ids are modified in place, check if the gaps have been removed correctly - assert all((x == {9, 8} for x in input_ids)) - assert_almost_equal(sp_timeseries, [1, 1, 1, 1]) - - -def test_autocorrelation_intermittency2lacking(): - """ - If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. - """ - input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) - assert_almost_equal(sp_timeseries, [1, 0, 0, 0]) - - def test_autocorrelation_intermittency1_windowJump_intermittencyAll(): """ Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. No frames should be skipped so intermittency should be applied to all. """ input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, - intermittency=1, window_jump=5) - assert all((x == {2, 3} for x in input_ids)) + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) + assert all((x == {2, 3} for x in corrected)) assert_almost_equal(sp_timeseries, [1, 1, 1]) -def test_autocorrelation_intermittency0_windowBigJump(): - """ - The empty sets are ignored (no interrttency) - """ +def test_autocorrelation_windowBigJump(): + #The empty sets are ignored (no intermittency) input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) assert_almost_equal(sp_timeseries, [1, 1, 1]) -def test_autocorrelation_intermittency0_windowBigJump_absence(): - """ - In the last frame the molecules are absent - """ +def test_autocorrelation_windowBigJump_absence(): + # In the last frame the molecules are absent input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.]) +def test_autocorrelation_intermittency1_many(): + input_ids = [{1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=14, + window_step=5) + assert_almost_equal(sp_timeseries, [1] * 15) + + def test_autocorrelation_intermittency2_windowBigJump(): - """ - The intermittency corrects the last frame - """ + # The intermittency corrects the last frame input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, - intermittency=2, window_jump=5) + corrected = correct_intermittency(input_ids, intermittency=2) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_SurvivalProbability_t0tf(universe): + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=2, stop=6) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) + + +def test_SurvivalProbability_definedTaus(universe): + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=0, stop=6, verbose=True) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) + + +def test_SurvivalProbability_zeroMolecules(universe): + # no atom IDs found + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=3, stop=6, verbose=True) + assert all(np.isnan(sp.sp_timeseries[1:])) + + +def test_SurvivalProbability_alwaysPresent(universe): + # always the same atom IDs found, 7 and 8 + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=0, stop=6, verbose=True) + assert all(np.equal(sp.sp_timeseries, 1)) + + +def test_SurvivalProbability_stepLargerThanDtmax(universe): + # Testing if the frames are skipped correctly + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=2, step=5, stop=9, verbose=True) + assert_equal(sp.sp_timeseries, [1, 1, 1]) + # with tau_max=2 for all the frames we only read 6 of them + # this is because the frames which are not used are skipped, and therefore 'select_atoms' + assert universe.trajectory.n_frames > 6 + assert_equal(select_atoms_mock.call_count, 6) + + +def test_SurvivalProbability_stepEqualDtMax(universe): + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=4, step=5, stop=9, verbose=True) + # all frames from 0, with 9 inclusive + assert_equal(select_atoms_mock.call_count, 10) From 968059b97a8022594111786fa11c194d02f61087 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 10 Apr 2019 14:23:56 +0100 Subject: [PATCH 09/40] Autocorrelation extracted from SP. --- package/MDAnalysis/analysis/utils/__init__.py | 0 .../analysis/utils/autocorrelation.py | 105 ++++++++++++++++++ package/MDAnalysis/analysis/waterdynamics.py | 27 +---- 3 files changed, 109 insertions(+), 23 deletions(-) create mode 100644 package/MDAnalysis/analysis/utils/__init__.py create mode 100644 package/MDAnalysis/analysis/utils/autocorrelation.py diff --git a/package/MDAnalysis/analysis/utils/__init__.py b/package/MDAnalysis/analysis/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py new file mode 100644 index 00000000000..867eee2fa39 --- /dev/null +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -0,0 +1,105 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +A flexible implementation of an autocorrelation function. +""" + +import numpy as np + +# TODO add documentation +def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): + # FIXME - make sure that it is not just a list of numbers + + # correct the dataset for gaps (intermittency) + _correct_intermittency(intermittency, list_of_sets) + + # calculate Survival Probability + tau_timeseries = np.arange(1, tau_max + 1) + sp_timeseries_data = [[] for _ in range(tau_max)] + + for t in range(0, len(list_of_sets), window_jump): + Nt = len(list_of_sets[t]) + + if Nt == 0: + continue + + for tau in tau_timeseries: + if t + tau >= len(list_of_sets): + break + + # continuous: IDs that survive from t to t + tau and at every frame in between + Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1])) + sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + + sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + return tau_timeseries, sp_timeseries, sp_timeseries_data + + +def _correct_intermittency(intermittency, id_list, verbose=False): + """ + Pre-process Consecutive Intermittency with a single pass over the data. + If an atom is absent for a number of frames equal or smaller + than the parameter intermittency, then correct the data and remove the absence. + ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence + + :param intermittency: the max gap allowed and to be corrected + :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + """ + if intermittency == 0: + return + + if verbose: + print('Correcting the selected IDs for intermittancy (gaps). ') + + for i, ids in enumerate(id_list): + # initially update each frame as seen 0 ago (now) + seen_frames_ago = {i: 0 for i in ids} + for j in range(1, intermittency + 2): + for atomid in seen_frames_ago.keys(): + # no more frames + if i + j >= len(id_list): + continue + + # if the atom is absent now + if not atomid in id_list[i + j]: + # increase its absence counter + seen_frames_ago[atomid] += 1 + continue + + # the atom is found + if seen_frames_ago[atomid] == 0: + # the atom was present in the last frame + continue + + # it was absent more times than allowed + if seen_frames_ago[atomid] > intermittency: + continue + + # the atom was absent but returned (within <= intermittency) + # add it to the frames where it was absent. + # Introduce the corrections. + for k in range(seen_frames_ago[atomid], 0, -1): + id_list[i + j - k].add(atomid) + + seen_frames_ago[atomid] = 0 \ No newline at end of file diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index a0f3b8cd379..553f5903cf7 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -458,6 +458,7 @@ import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter +from .utils.autocorrelation import autocorrelation class HydrogenBondLifetimes(object): @@ -1408,36 +1409,16 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no += 1 frame_loaded_counter += 1 - # correct the dataset for gaps (intermittency) - self._correct_intermittency(intermittency, self.selected_ids) - - # calculate Survival Probability - tau_timeseries = np.arange(1, tau_max + 1) - sp_timeseries_data = [[] for _ in range(tau_max)] - # adjust for the frames that were not loaded (step>tau_max + 1), # and for extra frames that were loaded (intermittency) window_jump = step - num_frames_to_skip - for t in range(0, len(self.selected_ids), window_jump): - Nt = len(self.selected_ids[t]) - - if Nt == 0: - self.print(verbose, - "At frame {} the selection did not find any molecule. Moving on to the next frame".format(t)) - continue - - for tau in tau_timeseries: - if t + tau >= len(self.selected_ids): - break - - # continuous: IDs that survive from t to t + tau and at every frame in between - Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1])) - sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + tau_timeseries, sp_timeseries, sp_timeseries_data = \ + autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data self.tau_timeseries = tau_timeseries - self.sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + self.sp_timeseries = sp_timeseries return self From c2b90297cce52d2657527d04d46e28e76195cf61 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 13:55:47 +0100 Subject: [PATCH 10/40] Exhaustive testing for autocorrelation. It will now return 1 at tau=0. Now the function can be embedded into HydrogenBondLifetime. --- .../analysis/utils/autocorrelation.py | 15 ++- package/MDAnalysis/analysis/waterdynamics.py | 18 ++-- .../analysis/test_waterdynamics.py | 98 ++++++++++++++++--- 3 files changed, 111 insertions(+), 20 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 867eee2fa39..6f162e15222 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -29,13 +29,21 @@ # TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): + """ + + :param list_of_sets: Modifies in place! + :param tau_max: + :param window_jump: + :param intermittency: + :return: + """ # FIXME - make sure that it is not just a list of numbers # correct the dataset for gaps (intermittency) _correct_intermittency(intermittency, list_of_sets) # calculate Survival Probability - tau_timeseries = np.arange(1, tau_max + 1) + tau_timeseries = list(range(1, tau_max + 1)) sp_timeseries_data = [[] for _ in range(tau_max)] for t in range(0, len(list_of_sets), window_jump): @@ -53,6 +61,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + + # at time 0 the value has to be one + tau_timeseries.insert(0, 0) + sp_timeseries.insert(0, 1) + return tau_timeseries, sp_timeseries, sp_timeseries_data diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 553f5903cf7..d5143fa989d 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -297,10 +297,6 @@ for tau, sp in zip(tau_timeseries, sp_timeseries): print("{time} {sp}".format(time=tau, sp=sp)) - # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0: - tau_timeseries.insert(0, 0) - sp_timeseries.insert(1, 0) - # plot plt.xlabel('Time') plt.ylabel('SP') @@ -464,19 +460,19 @@ class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis - This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes" + This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs connectivity. A slow decay indicate very stables hydrogen bonds, like in ice. The HBL is also know as "Hydrogen Bond Population Relaxation" - (HBPR). In the continuos case we have: + (HBPR). In the continuous case we have: .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair - :math:`ij` during time interval :math:`t_0+\tau` (continuos) and + :math:`ij` during time interval :math:`t_0+\tau` (continuous) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: .. math:: @@ -1373,6 +1369,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # preload the frames (atom IDs) to a list of sets self.selected_ids = [] + # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, + # on which the parallel _single_frame can be applied. + # skip frames that will not be used # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip # Intermittency means that we have to load the extra frames to know if the atom is actually missing. @@ -1416,6 +1415,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau_timeseries, sp_timeseries, sp_timeseries_data = \ autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) + # warn the user if the NaN are found + if all(np.isnan(sp_timeseries[1:])): + # fixme - warn the user that the dataset. Use a standardised warning. + print('NaN Error: Most likely data was not found. Check your atom selections. ') + # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e6dc29b0f35..04e09c48611 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,9 +23,9 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics +from MDAnalysis.analysis.utils.autocorrelation import autocorrelation from MDAnalysisTests.datafiles import waterPSF, waterDCD -from MDAnalysisTests.datafiles import PDB, XTC import pytest import numpy as np @@ -92,7 +92,7 @@ def test_SurvivalProbability_t0tf(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=2, stop=6) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_definedTaus(universe): @@ -101,7 +101,7 @@ def test_SurvivalProbability_definedTaus(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_zeroMolecules(universe): @@ -109,7 +109,7 @@ def test_SurvivalProbability_zeroMolecules(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=3, stop=6, verbose=True) - assert all(np.isnan(sp.sp_timeseries)) + assert all(np.isnan(sp.sp_timeseries[1:])) def test_SurvivalProbability_alwaysPresent(universe): @@ -125,7 +125,7 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe): with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, step=5, stop=9, verbose=True) - assert_equal(sp.sp_timeseries, [1, 1]) + assert_equal(sp.sp_timeseries, [1, 1, 1]) # with tau_max=2 for all the frames we only read 6 of them # this is because the frames which are not used are skipped, and therefore 'select_atoms' assert universe.trajectory.n_frames > 6 @@ -150,8 +150,8 @@ def test_SurvivalProbability_intermittency1and2(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - assert all((x == set([9, 8]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + assert all((x == {9, 8} for x in sp.selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1]) def test_SurvivalProbability_intermittency2lacking(universe): @@ -163,7 +163,7 @@ def test_SurvivalProbability_intermittency2lacking(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) - assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) + assert_almost_equal(sp.sp_timeseries, [1, 0, 0, 0]) def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): @@ -176,8 +176,9 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([2, 3]) for x in sp.selected_ids)) - assert_almost_equal(sp.sp_timeseries, [1, 1]) + assert all((x == {2, 3} for x in sp.selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + def test_SurvivalProbability_intermittency1_step5_Skipping(universe): """ @@ -190,6 +191,79 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == set([1]) for x in sp.selected_ids)) + assert all((x == {1} for x in sp.selected_ids)) assert len(sp.selected_ids) == beforepopsing - assert_almost_equal(sp.sp_timeseries, [1]) \ No newline at end of file + assert_almost_equal(sp.sp_timeseries, [1, 1]) + + +def test_autocorrelation_alwaysPresent(): + input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3) + assert all(np.equal(sp_timeseries, 1)) + + +def test_autocorrelation_definedTaus(): + input_ids = [{9, 8, 7}, {8, 7, 6}, {7, 6, 5}, {6, 5, 4}, {5, 4, 3}, {4, 3, 2}, {3, 2, 1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3) + assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0]) + + +def test_autocorrelation_intermittency1and2(): + """ + Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, + but then returns at the following step. + """ + input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) + # input_ids are modified in place, check if the gaps have been removed correctly + assert all((x == {9, 8} for x in input_ids)) + assert_almost_equal(sp_timeseries, [1, 1, 1, 1]) + + +def test_autocorrelation_intermittency2lacking(): + """ + If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. + """ + input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) + assert_almost_equal(sp_timeseries, [1, 0, 0, 0]) + + +def test_autocorrelation_intermittency1_windowJump_intermittencyAll(): + """ + Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. + No frames should be skipped so intermittency should be applied to all. + """ + input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, + intermittency=1, window_jump=5) + assert all((x == {2, 3} for x in input_ids)) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_intermittency0_windowBigJump(): + """ + The empty sets are ignored (no interrttency) + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_autocorrelation_intermittency0_windowBigJump_absence(): + """ + In the last frame the molecules are absent + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.]) + + +def test_autocorrelation_intermittency2_windowBigJump(): + """ + The intermittency corrects the last frame + """ + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, + intermittency=2, window_jump=5) + assert_almost_equal(sp_timeseries, [1, 1, 1]) From bba1474b7e5c4d4cc5624e47ce500482bff48018 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:42:32 +0100 Subject: [PATCH 11/40] A more general documentation in the autocorrelation. --- .../analysis/utils/autocorrelation.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 6f162e15222..dc4a186f040 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -21,15 +21,14 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -""" -A flexible implementation of an autocorrelation function. -""" - import numpy as np + # TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): """ + The descrete implementation of the autocorrelation function. + fixme - change to the numpy documentation :param list_of_sets: Modifies in place! :param tau_max: @@ -37,15 +36,16 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): :param intermittency: :return: """ - # FIXME - make sure that it is not just a list of numbers + # fixme - add checks if this is a list of sets + # fixme - check dimensions # correct the dataset for gaps (intermittency) _correct_intermittency(intermittency, list_of_sets) - # calculate Survival Probability tau_timeseries = list(range(1, tau_max + 1)) - sp_timeseries_data = [[] for _ in range(tau_max)] + timeseries_data = [[] for _ in range(tau_max)] + # calculate autocorrelation for t in range(0, len(list_of_sets), window_jump): Nt = len(list_of_sets[t]) @@ -58,15 +58,15 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): # continuous: IDs that survive from t to t + tau and at every frame in between Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1])) - sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) + timeseries_data[tau - 1].append(Ntau / float(Nt)) - sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data] + timeseries = [np.mean(x) for x in timeseries_data] # at time 0 the value has to be one tau_timeseries.insert(0, 0) - sp_timeseries.insert(0, 1) + timeseries.insert(0, 1) - return tau_timeseries, sp_timeseries, sp_timeseries_data + return tau_timeseries, timeseries, timeseries_data def _correct_intermittency(intermittency, id_list, verbose=False): From 3056a7ee2ce4477c652eae2f7eaab693dc9e7b87 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:43:03 +0100 Subject: [PATCH 12/40] Updated test cases to illuminate the bug in #2247. --- .../MDAnalysisTests/analysis/test_waterdynamics.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 04e09c48611..1317c77afa2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -45,8 +45,17 @@ def universe(): def test_HydrogenBondLifetimes(universe): hbl = waterdynamics.HydrogenBondLifetimes( universe, SELECTION1, SELECTION1, 0, 5, 3) + hbl.run(stop=5) + assert_almost_equal(hbl.timeseries[2], 0.75) + + +def test_HydrogenBondLifetimes_growing_continuous(universe): + # The autocorrelation cannot grow + hbl = waterdynamics.HydrogenBondLifetimes( + universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5) hbl.run() - assert_almost_equal(hbl.timeseries[2][1], 0.75, 5) + # Index 0 in frameX[0] refers to the continuous, 1 to intermittent + assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])]) def test_WaterOrientationalRelaxation(universe): From 22d642162c186232a4018fd388f797d7c1368c09 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Sun, 14 Apr 2019 16:43:55 +0100 Subject: [PATCH 13/40] First switch to using autocorrelation that passes the tests. --- package/MDAnalysis/analysis/waterdynamics.py | 213 +++---------------- 1 file changed, 27 insertions(+), 186 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index d5143fa989d..ad32d75074f 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -464,8 +464,8 @@ class HydrogenBondLifetimes(object): (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs - connectivity. A slow decay indicate very stables hydrogen bonds, like in - ice. The HBL is also know as "Hydrogen Bond Population Relaxation" + connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice). + The HBL is also know as "Hydrogen Bond Population Relaxation" (HBPR). In the continuous case we have: .. math:: @@ -507,6 +507,7 @@ class HydrogenBondLifetimes(object): def __init__(self, universe, selection1, selection2, t0, tf, dtmax, nproc=1): + # fixme - use the same interface you used in SP self.universe = universe self.selection1 = selection1 self.selection2 = selection2 @@ -516,196 +517,33 @@ def __init__(self, universe, selection1, selection2, t0, tf, dtmax, self.nproc = nproc self.timeseries = None - def _getC_i(self, HBP, t0, t): - """ - This function give the intermitent Hydrogen Bond Lifetime - C_i = / between t0 and t - """ - C_i = 0 - for i in range(len(HBP[t0])): - for j in range(len(HBP[t])): - if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]): - C_i += 1 - break - if len(HBP[t0]) == 0: - return 0.0 - else: - return float(C_i) / len(HBP[t0]) - def _getC_c(self, HBP, t0, t): - """ - This function give the continous Hydrogen Bond Lifetime - C_c = / between t0 and t - """ - C_c = 0 - dt = 1 - begt0 = t0 - HBP_cp = HBP - HBP_t0 = HBP[t0] - newHBP = [] - if t0 == t: - return 1.0 - while t0 + dt <= t: - for i in range(len(HBP_t0)): - for j in range(len(HBP_cp[t0 + dt])): - if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and - HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]): - newHBP.append(HBP_t0[i]) - break - C_c = len(newHBP) - t0 += dt - HBP_t0 = newHBP - newHBP = [] - if len(HBP[begt0]) == 0: - return 0 - else: - return C_c / float(len(HBP[begt0])) + def run(self, **kwargs): + # Get all hydrogen bonds + hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, + self.selection1, + self.selection2, + distance=3.5, + angle=120.0) + # fixme - use it only for the time requested + # fixme - this currently ignores the constructor parameters t0 tf etc + hydrogen_bonds.run(**kwargs) - def _intervC_c(self, HBP, t0, tf, dt): - """ - This function gets all the data for the h(t0)h(t0+dt)', where - t0 = 1,2,3,...,tf. This function give us one point of the final plot - HBL vs t - """ - a = 0 - count = 0 - for i in range(len(HBP)): - if (t0 + dt <= tf): - if t0 == t0 + dt: - b = self._getC_c(HBP, t0, t0) - break - b = self._getC_c(HBP, t0, t0 + dt) - t0 += dt - a += b - count += 1 - if count == 0: - return 1.0 - return a / count - - def _intervC_i(self, HBP, t0, tf, dt): - """ - This function gets all the data for the h(t0)h(t0+dt), where - t0 = 1,2,3,...,tf. This function give us a point of the final plot - HBL vs t - """ - a = 0 - count = 0 - for i in range(len(HBP)): - if (t0 + dt <= tf): - b = self._getC_i(HBP, t0, t0 + dt) - t0 += dt - a += b - count += 1 - return a / count - - def _finalGraphGetC_i(self, HBP, t0, tf, maxdt): - """ - This function gets the final data of the C_i graph. - """ - output = [] - for dt in range(maxdt): - a = self._intervC_i(HBP, t0, tf, dt) - output.append(a) - return output + # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] + hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - def _finalGraphGetC_c(self, HBP, t0, tf, maxdt): - """ - This function gets the final data of the C_c graph. - """ - output = [] - for dt in range(maxdt): - a = self._intervC_c(HBP, t0, tf, dt) - output.append(a) - return output + # Calculate the + tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax) - def _getGraphics(self, HBP, t0, tf, maxdt): - """ - Function that join all the results into a plot. - """ - a = [] - cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt) - inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt) - for i in range(len(cont)): - fix = [cont[i], inte[i]] - a.append(fix) - return a + # fixme - document + self.hydrogen_bonds = hydrogen_bonds.timeseries - def _HBA(self, ts, conn, universe, selAtom1, selAtom2, - verbose=False): - """ - Main function for calculate C_i and C_c in parallel. - """ - finalGetResidue1 = selAtom1 - finalGetResidue2 = selAtom2 - frame = ts.frame - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, - finalGetResidue1, - finalGetResidue2, - distance=3.5, - angle=120.0, - start=frame - 1, - stop=frame) - while True: - try: - h.run(verbose=verbose) - break - except: - print("error") - print("trying again") - sys.stdout.flush() - sys.stdout.flush() - conn.send(h.timeseries[0]) - conn.close() + # user can investigate the distribution and sample size + self.sp_timeseries_data = timeseries_data + self.tau_timeseries = tau_timeseries + self.timeseries = timeseries + return self - def run(self, **kwargs): - """Analyze trajectory and produce timeseries""" - h_list = [] - i = 0 - if (self.nproc > 1): - while i < len(self.universe.trajectory): - jobs = [] - k = i - for j in range(self.nproc): - # start - print("ts=", i + 1) - if i >= len(self.universe.trajectory): - break - conn_parent, conn_child = multiprocessing.Pipe(False) - while True: - try: - # new thread - jobs.append( - (multiprocessing.Process( - target=self._HBA, - args=(self.universe.trajectory[i], - conn_child, self.universe, - self.selection1, self.selection2,)), - conn_parent)) - break - except: - print("error in jobs.append") - jobs[j][0].start() - i = i + 1 - - for j in range(self.nproc): - if k >= len(self.universe.trajectory): - break - rec01 = jobs[j][1] - received = rec01.recv() - h_list.append(received) - jobs[j][0].join() - k += 1 - self.timeseries = self._getGraphics( - h_list, 0, self.tf - 1, self.dtmax) - else: - h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, - self.selection1, - self.selection2, - distance=3.5, - angle=120.0) - h_list.run(**kwargs) - self.timeseries = self._getGraphics( - h_list.timeseries, self.t0, self.tf, self.dtmax) class WaterOrientationalRelaxation(object): @@ -1347,6 +1185,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau from 1 to `tau_max`. Saved in the field tau_timeseries. sp_timeseries : list survival probability for each value of `tau`. Saved in the field sp_timeseries. + sp_timeseries_data: list + raw datapoints from which the average is taken (sp_timeseries). + Time dependancy and distribution can be extracted. """ # backward compatibility (and priority) From 9188f115ee0af2c8ef77fa055186651dbf334c30 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Mon, 15 Apr 2019 23:28:11 +0100 Subject: [PATCH 14/40] The documentation updated with the code. The HydrogenBondAnalysis should ideally be extracted from the HydrogenBondLifetime and fed into it in the constructor. --- .../analysis/utils/autocorrelation.py | 59 +++++-- package/MDAnalysis/analysis/waterdynamics.py | 154 +++++++++--------- 2 files changed, 124 insertions(+), 89 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index dc4a186f040..50891765009 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -24,23 +24,48 @@ import numpy as np -# TODO add documentation def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): - """ - The descrete implementation of the autocorrelation function. - fixme - change to the numpy documentation - - :param list_of_sets: Modifies in place! - :param tau_max: - :param window_jump: - :param intermittency: - :return: + r"""The descrete implementation of the autocorrelation function. + + Here is a random equation that shows something. + + .. math:: + C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} + + + + fixme - list_of_sets: Modifies in place! + Parameters + ---------- + list_of_sets : list + List of sets, + tau_max : int + The last tau (inclusive) for which to carry out autocorrelation. + window_jump : int, optional + The step for the t0 to perform autocorrelation (without the overlap). Default is 1. + intermittency : int, optional + Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns, + intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any + frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will + be counted as present. + + Returns + -------- + tau_timeseries : list of int + the tau for which the autocorrelation was calculated + timeseries : list of int + the autocorelation values for each of the tau values + timeseries_data : list of list of int + the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated. + + .. versionadded:: 0.19.2 """ # fixme - add checks if this is a list of sets # fixme - check dimensions + # fixme - check parameters? # correct the dataset for gaps (intermittency) - _correct_intermittency(intermittency, list_of_sets) + correct_intermittency(intermittency, list_of_sets) tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] @@ -69,15 +94,21 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): return tau_timeseries, timeseries, timeseries_data -def _correct_intermittency(intermittency, id_list, verbose=False): +def correct_intermittency(intermittency, id_list, verbose=False): """ Pre-process Consecutive Intermittency with a single pass over the data. If an atom is absent for a number of frames equal or smaller than the parameter intermittency, then correct the data and remove the absence. ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - :param intermittency: the max gap allowed and to be corrected - :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + Parameters + ---------- + intermittency: int + the max gap allowed and to be corrected + id_list: fixme + modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + verbose: Boolean, optional + print """ if intermittency == 0: return diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index ad32d75074f..2635bacbbb3 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -450,15 +450,16 @@ from six.moves import range, zip_longest import numpy as np -import multiprocessing import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation + class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis + fixme - update the definition of intermittency (refer the user to autocorrelation) This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can @@ -492,48 +493,100 @@ class HydrogenBondLifetimes(object): It could be any selection available in MDAnalysis, not just water. selection2 : str Selection string to analize its HBL against selection1 - t0 : int - frame where analysis begins - tf : int - frame where analysis ends - dtmax : int - Maximum dt size, `dtmax` < `tf` or it will crash. - nproc : int - Number of processors to use, by default is 1. - + verbose : Boolean, optional + When True, prints progress and comments to the console. .. versionadded:: 0.11.0 """ - def __init__(self, universe, selection1, selection2, t0, tf, dtmax, - nproc=1): - # fixme - use the same interface you used in SP + def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False): self.universe = universe self.selection1 = selection1 self.selection2 = selection2 - self.t0 = t0 - self.tf = tf - 1 - self.dtmax = dtmax - self.nproc = nproc - self.timeseries = None + self.verbose = verbose + # backward compatibility + self.start = self.stop = self.tau_max = None + if t0 is not None: + self.start = t0 + warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning) + + if tf is not None: + self.stop = tf + warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning) + + if dtmax is not None: + self.tau_max = dtmax + warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning) + + + def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs): + """ + Computes and returns the Survival Probability (SP) timeseries + + Parameters + ---------- + start : int, optional + Zero-based index of the first frame to be analysed + stop : int, optional + Zero-based index of the last frame to be analysed (inclusive) + step : int, optional + Jump every `step`-th frame. This is compatible but independant of the taus used. + Note that `step` and `tau_max` work consistently with intermittency. + tau_max : int, optional + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` + intermittency : int, optional + The maximum number of consecutive frames for which a bond can disappear but be counted as present if it + returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does + not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may + disappear for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + verbose : Boolean, optional + Print the progress to the console + + Returns + ------- + tau_timeseries : list + tau from 1 to `tau_max`. Saved in the field tau_timeseries. + timeseries : list + survival probability for each value of `tau`. Saved in the field sp_timeseries. + timeseries_data: list + raw datapoints from which the average is taken (sp_timeseries). + Time dependancy and distribution can be extracted. + """ + + # backward compatibility (and priority) + start = self.start if self.start is not None else start + stop = self.stop if self.stop is not None else stop + tau_max = self.tau_max if self.tau_max is not None else tau_max + + # sanity checks + if stop is not None and stop >= len(self.universe.trajectory): + raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.") + + if stop is None: + stop = len(self.universe.trajectory) + else: + stop = stop + 1 + + if tau_max > (stop - start): + raise ValueError("Too few frames selected for given tau_max.") - def run(self, **kwargs): # Get all hydrogen bonds + # fixme - we should not have the frozen definition of the hydrogen bonds + # fixme - this will be upated with the new HydrogenBond interface hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, self.selection1, self.selection2, distance=3.5, angle=120.0) - # fixme - use it only for the time requested - # fixme - this currently ignores the constructor parameters t0 tf etc - hydrogen_bonds.run(**kwargs) + # Compute the minimal number of hydrogen bonds + hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs) # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] - hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] + found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - # Calculate the - tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax) + tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax) # fixme - document self.hydrogen_bonds = hydrogen_bonds.timeseries @@ -1094,62 +1147,13 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa self.tau_max = dtmax warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning) + def print(self, verbose, *args): if self.verbose: print(args) elif verbose: print(args) - def _correct_intermittency(self, intermittency, selected_ids): - """ - Pre-process Consecutive Intermittency with a single pass over the data. - If an atom is absent for a number of frames equal or smaller - than the `intermittency`, then correct the data and remove the absence. - ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - - Parameters - ---------- - intermittency : int - the max gap allowed and to be corrected - selected_ids: list of ids - modifies the selecteded IDs in place by adding atoms which left for <= `intermittency` - """ - self.print('Correcting the selected IDs for intermittancy (gaps). ') - if intermittency == 0: - return - - for i, ids in enumerate(selected_ids): - # initially update each frame as seen 0 ago (now) - seen_frames_ago = {i: 0 for i in ids} - for j in range(1, intermittency + 2): - for atomid in seen_frames_ago.keys(): - # no more frames - if i + j >= len(selected_ids): - continue - - # if the atom is absent now - if not atomid in selected_ids[i + j]: - # increase its absence counter - seen_frames_ago[atomid] += 1 - continue - - # the atom is found - if seen_frames_ago[atomid] == 0: - # the atom was present in the last frame - continue - - # it was absent more times than allowed - if seen_frames_ago[atomid] > intermittency: - continue - - # the atom was absent but returned (within <= intermittency) - # add it to the frames where it was absent. - # Introduce the corrections. - for k in range(seen_frames_ago[atomid], 0, -1): - selected_ids[i + j - k].add(atomid) - - seen_frames_ago[atomid] = 0 - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ From 594183dfef0ec0c2a57b6d5fd64ad1f55ce9ea08 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 18 Apr 2019 13:20:34 +0100 Subject: [PATCH 15/40] Correction for the test to make it consistent with the previous author's test. Our end index is inclusive (5th frame, giving 6 frames). But the other author's analysis was done on 0-4 frames (5 frames). --- package/MDAnalysis/analysis/waterdynamics.py | 24 +++++++------------ .../analysis/test_waterdynamics.py | 5 ++-- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 2635bacbbb3..a77751f3618 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -445,16 +445,16 @@ """ from __future__ import print_function, division, absolute_import -import warnings - from six.moves import range, zip_longest - +import logging +import warnings import numpy as np import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation +logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') class HydrogenBondLifetimes(object): @@ -475,6 +475,7 @@ class HydrogenBondLifetimes(object): where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair :math:`ij` during time interval :math:`t_0+\tau` (continuous) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: + -fixme - update the intermittent definition .. math:: C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} @@ -575,6 +576,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F # Get all hydrogen bonds # fixme - we should not have the frozen definition of the hydrogen bonds # fixme - this will be upated with the new HydrogenBond interface + # fixme - remove this from here and insert it into the example hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, self.selection1, self.selection2, @@ -586,7 +588,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax) + tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max) # fixme - document self.hydrogen_bonds = hydrogen_bonds.timeseries @@ -1148,13 +1150,6 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning) - def print(self, verbose, *args): - if self.verbose: - print(args) - elif verbose: - print(args) - - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ Computes and returns the Survival Probability (SP) timeseries @@ -1232,7 +1227,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window: - self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip) + logger.info("Skipping the next %d frames:" % num_frames_to_skip) frame_no += num_frames_to_skip frame_loaded_counter = 0 # Correct the number of frames to be loaded after the first window (which starts at t=0, and @@ -1243,7 +1238,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # update the frame number self.universe.trajectory[frame_no] - self.print(verbose, "Loading frame:", self.universe.trajectory.ts) + logging.info("Loading frame:", self.universe.trajectory.ts) atoms = self.universe.select_atoms(self.selection) # SP of residues or of atoms @@ -1262,8 +1257,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # warn the user if the NaN are found if all(np.isnan(sp_timeseries[1:])): - # fixme - warn the user that the dataset. Use a standardised warning. - print('NaN Error: Most likely data was not found. Check your atom selections. ') + logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ') # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 1317c77afa2..978b82bc29b 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -44,9 +44,10 @@ def universe(): def test_HydrogenBondLifetimes(universe): hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, 0, 5, 3) + universe, SELECTION1, SELECTION1, 0, 4, 3) hbl.run(stop=5) - assert_almost_equal(hbl.timeseries[2], 0.75) + assert_almost_equal(hbl.timeseries[3], 0.75) + def test_HydrogenBondLifetimes_growing_continuous(universe): From c065f7e9122a84edbb39f1a7dd60a3eeae86d482 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 18 Apr 2019 18:15:26 +0100 Subject: [PATCH 16/40] Extracting the autocorrelation and intermittency into their own functions with their own test cases. Removed the class HydrogenBondLifetimes and moved the autocorrelation functionality into the hbond_analysis class as a .autocorrelation method. This method should replace also hbond_autocorrel.py file. Test cases for the new method .autocorrelation are added. Added a test cases that, with our definition of intermittency, would fail in the case of the hbond_autocorrel.py. --- .../analysis/hbonds/hbond_analysis.py | 55 +++++ .../analysis/hbonds/hbond_autocorrel.py | 5 + .../analysis/utils/autocorrelation.py | 40 ++- package/MDAnalysis/analysis/waterdynamics.py | 138 ++--------- .../MDAnalysisTests/analysis/test_hbonds.py | 33 +++ .../analysis/test_waterdynamics.py | 232 +++++++++--------- 6 files changed, 251 insertions(+), 252 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 99fb6287b2c..4240662ee34 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -336,6 +336,8 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): from MDAnalysis.lib import distances from MDAnalysis.lib.util import deprecate +from ..utils.autocorrelation import autocorrelation, correct_intermittency + logger = logging.getLogger('MDAnalysis.analysis.hbonds') @@ -1440,3 +1442,56 @@ def _make_dict(donors, hydrogens): h2donor.update(_make_dict(s1d, s1h)) return h2donor + + + def autocorrelation(self, tau_max=20, window_step=1, intermittency=0): + """ + Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds. + + Parameters + ---------- + window_step : int, optional + Jump every `step`-th frame. This is compatible but independant of the taus used. + Note that `step` and `tau_max` work consistently with intermittency. + tau_max : int, optional + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` + intermittency : int, optional + The maximum number of consecutive frames for which a bond can disappear but be counted as present if it + returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does + not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may + disappear for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + + Returns + ------- + tau_timeseries : list + tau from 1 to `tau_max`. Saved in the field tau_timeseries. + timeseries : list + autcorrelation value for each value of `tau`. Saved in the field timeseries. + timeseries_data: list + raw datapoints from which the average is taken (timeseries). + Time dependancy and distribution can be extracted. + """ + + if self._timeseries is None: + logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found") + logging.error("Autocorrelation: Please use the .run() before calling this function") + return + + if self.step != 1: + logging.warning("Autocorrelation function should be carried out on consecutive frames. ") + logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'") + + # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..] + hydrogen_bonds = self.timeseries + found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds] + + intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency) + tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max, + window_step=window_step) + self.acf_tau_timeseries = tau_timeseries + self.acf_timeseries = timeseries + # user can investigate the distribution and sample size + self.acf_timeseries_data = timeseries_data + + return self diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index ebb2933c317..93b332e6ec7 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -296,6 +296,11 @@ def __init__(self, universe, nruns=1, # number of times to iterate through the trajectory nsamples=50, # number of different points to sample in a run pbc=True): + + warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + "which has .autocorrelation function", + category=DeprecationWarning) + self.u = universe # check that slicing is possible try: diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 50891765009..efb25c40623 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -22,9 +22,10 @@ # import numpy as np +from copy import deepcopy -def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): +def autocorrelation(list_of_sets, tau_max, window_step=1): r"""The descrete implementation of the autocorrelation function. Here is a random equation that shows something. @@ -33,7 +34,6 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} - fixme - list_of_sets: Modifies in place! Parameters ---------- @@ -41,8 +41,9 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): List of sets, tau_max : int The last tau (inclusive) for which to carry out autocorrelation. - window_jump : int, optional + window_step : int, optional The step for the t0 to perform autocorrelation (without the overlap). Default is 1. + fixme - move intermittency to the right place intermittency : int, optional Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns, intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any @@ -64,14 +65,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): # fixme - check dimensions # fixme - check parameters? - # correct the dataset for gaps (intermittency) - correct_intermittency(intermittency, list_of_sets) - tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] # calculate autocorrelation - for t in range(0, len(list_of_sets), window_jump): + for t in range(0, len(list_of_sets), window_step): Nt = len(list_of_sets[t]) if Nt == 0: @@ -93,8 +91,8 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0): return tau_timeseries, timeseries, timeseries_data - -def correct_intermittency(intermittency, id_list, verbose=False): +# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds) +def correct_intermittency(list_of_sets, intermittency): """ Pre-process Consecutive Intermittency with a single pass over the data. If an atom is absent for a number of frames equal or smaller @@ -103,30 +101,27 @@ def correct_intermittency(intermittency, id_list, verbose=False): Parameters ---------- + id_list: list of sets + returns a new list with the IDs with added IDs which disappeared for <= :param intermittency intermittency: int the max gap allowed and to be corrected - id_list: fixme - modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency - verbose: Boolean, optional - print """ if intermittency == 0: - return + return list_of_sets - if verbose: - print('Correcting the selected IDs for intermittancy (gaps). ') + list_of_sets = deepcopy(list_of_sets) - for i, ids in enumerate(id_list): + for i, ids in enumerate(list_of_sets): # initially update each frame as seen 0 ago (now) seen_frames_ago = {i: 0 for i in ids} for j in range(1, intermittency + 2): for atomid in seen_frames_ago.keys(): # no more frames - if i + j >= len(id_list): + if i + j >= len(list_of_sets): continue # if the atom is absent now - if not atomid in id_list[i + j]: + if not atomid in list_of_sets[i + j]: # increase its absence counter seen_frames_ago[atomid] += 1 continue @@ -140,10 +135,11 @@ def correct_intermittency(intermittency, id_list, verbose=False): if seen_frames_ago[atomid] > intermittency: continue - # the atom was absent but returned (within <= intermittency) + # the atom was absent but returned (within <= intermittency_value) # add it to the frames where it was absent. # Introduce the corrections. for k in range(seen_frames_ago[atomid], 0, -1): - id_list[i + j - k].add(atomid) + list_of_sets[i + j - k].add(atomid) - seen_frames_ago[atomid] = 0 \ No newline at end of file + seen_frames_ago[atomid] = 0 + return list_of_sets \ No newline at end of file diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index a77751f3618..1f578f2a7ed 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -450,32 +450,30 @@ import warnings import numpy as np -import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter -from .utils.autocorrelation import autocorrelation +from .utils.autocorrelation import autocorrelation, correct_intermittency logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis - fixme - update the definition of intermittency (refer the user to autocorrelation) + fixme - deprecate it - This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes" + This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can obtain the continuous and intermittent behavior of hydrogen bonds in time. A fast decay in these parameters indicate a fast change in HBs - connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice). - The HBL is also know as "Hydrogen Bond Population Relaxation" - (HBPR). In the continuous case we have: + connectivity. A slow decay indicate very stables hydrogen bonds, like in + ice. The HBL is also know as "Hydrogen Bond Population Relaxation" + (HBPR). In the continuos case we have: .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair - :math:`ij` during time interval :math:`t_0+\tau` (continuous) and + :math:`ij` during time interval :math:`t_0+\tau` (continuos) and :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have: - -fixme - update the intermittent definition .. math:: C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} @@ -494,111 +492,24 @@ class HydrogenBondLifetimes(object): It could be any selection available in MDAnalysis, not just water. selection2 : str Selection string to analize its HBL against selection1 - verbose : Boolean, optional - When True, prints progress and comments to the console. + t0 : int + frame where analysis begins + tf : int + frame where analysis ends + dtmax : int + Maximum dt size, `dtmax` < `tf` or it will crash. + nproc : int + Number of processors to use, by default is 1. + .. versionadded:: 0.11.0 """ def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False): - self.universe = universe - self.selection1 = selection1 - self.selection2 = selection2 - self.verbose = verbose - - # backward compatibility - self.start = self.stop = self.tau_max = None - if t0 is not None: - self.start = t0 - warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning) - - if tf is not None: - self.stop = tf - warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning) - - if dtmax is not None: - self.tau_max = dtmax - warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning) - - - def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs): - """ - Computes and returns the Survival Probability (SP) timeseries - - Parameters - ---------- - start : int, optional - Zero-based index of the first frame to be analysed - stop : int, optional - Zero-based index of the last frame to be analysed (inclusive) - step : int, optional - Jump every `step`-th frame. This is compatible but independant of the taus used. - Note that `step` and `tau_max` work consistently with intermittency. - tau_max : int, optional - Survival probability is calculated for the range 1 <= `tau` <= `tau_max` - intermittency : int, optional - The maximum number of consecutive frames for which a bond can disappear but be counted as present if it - returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does - not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may - disappear for up to two consecutive frames yet be treated as being present at all frames. - The default is continuous (0). - verbose : Boolean, optional - Print the progress to the console - - Returns - ------- - tau_timeseries : list - tau from 1 to `tau_max`. Saved in the field tau_timeseries. - timeseries : list - survival probability for each value of `tau`. Saved in the field sp_timeseries. - timeseries_data: list - raw datapoints from which the average is taken (sp_timeseries). - Time dependancy and distribution can be extracted. - """ - - # backward compatibility (and priority) - start = self.start if self.start is not None else start - stop = self.stop if self.stop is not None else stop - tau_max = self.tau_max if self.tau_max is not None else tau_max - - # sanity checks - if stop is not None and stop >= len(self.universe.trajectory): - raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.") - - if stop is None: - stop = len(self.universe.trajectory) - else: - stop = stop + 1 - - if tau_max > (stop - start): - raise ValueError("Too few frames selected for given tau_max.") - - # Get all hydrogen bonds - # fixme - we should not have the frozen definition of the hydrogen bonds - # fixme - this will be upated with the new HydrogenBond interface - # fixme - remove this from here and insert it into the example - hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, - self.selection1, - self.selection2, - distance=3.5, - angle=120.0) - # Compute the minimal number of hydrogen bonds - hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs) - - # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()] - found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries] - - tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max) - - # fixme - document - self.hydrogen_bonds = hydrogen_bonds.timeseries - - # user can investigate the distribution and sample size - self.sp_timeseries_data = timeseries_data - self.tau_timeseries = tau_timeseries - self.timeseries = timeseries - return self - + warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + "which has .autocorrelation function", + category=DeprecationWarning) + pass class WaterOrientationalRelaxation(object): @@ -1207,7 +1118,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte raise ValueError("Too few frames selected for given tau_max.") # preload the frames (atom IDs) to a list of sets - self.selected_ids = [] + self._selected_ids = [] # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, # on which the parallel _single_frame can be applied. @@ -1243,7 +1154,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # SP of residues or of atoms ids = atoms.residues.resids if residues else atoms.ids - self.selected_ids.append(set(ids)) + self._selected_ids.append(set(ids)) frame_no += 1 frame_loaded_counter += 1 @@ -1252,8 +1163,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # and for extra frames that were loaded (intermittency) window_jump = step - num_frames_to_skip - tau_timeseries, sp_timeseries, sp_timeseries_data = \ - autocorrelation(self.selected_ids, tau_max, window_jump,intermittency) + self._intermittent_selected_ids = correct_intermittency(self._selected_ids, intermittency=intermittency) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(self._intermittent_selected_ids, + tau_max, window_jump) # warn the user if the NaN are found if all(np.isnan(sp_timeseries[1:])): diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index b10cce2624e..cb00248b087 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -399,6 +399,39 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table): assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)") +class TestHydrogenBondAnalysisAutocorrelation(object): + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + @staticmethod + @pytest.fixture(scope='class') + def selection(): + return "byres name OH2" + + def test_HydrogenBondLifetimes(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=4) + hbonds.autocorrelation(tau_max=3) + assert_almost_equal(hbonds.acf_timeseries[3], 0.75) + + def test_HydrogenBondLifetimes_growing_continuous(self, universe, selection): + hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + selection, + selection, + distance=3.5, + angle=120.0) + hbonds.run(start=0, stop=9) + hbonds.autocorrelation(tau_max=5) + assert all([frame >= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])]) + + + class TestHydrogenBondAnalysisTIP3PHeavyPBC(object): @staticmethod @pytest.fixture(scope='class') diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 978b82bc29b..e9a4440c7c5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,7 +23,7 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics -from MDAnalysis.analysis.utils.autocorrelation import autocorrelation +from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency from MDAnalysisTests.datafiles import waterPSF, waterDCD @@ -42,23 +42,6 @@ def universe(): return MDAnalysis.Universe(waterPSF, waterDCD) -def test_HydrogenBondLifetimes(universe): - hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, 0, 4, 3) - hbl.run(stop=5) - assert_almost_equal(hbl.timeseries[3], 0.75) - - - -def test_HydrogenBondLifetimes_growing_continuous(universe): - # The autocorrelation cannot grow - hbl = waterdynamics.HydrogenBondLifetimes( - universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5) - hbl.run() - # Index 0 in frameX[0] refers to the continuous, 1 to intermittent - assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])]) - - def test_WaterOrientationalRelaxation(universe): wor = waterdynamics.WaterOrientationalRelaxation( universe, SELECTION1, 0, 5, 2) @@ -96,77 +79,24 @@ def test_MeanSquareDisplacement_zeroMolecules(universe): assert_almost_equal(msd_zero.timeseries[1], 0.0) -def test_SurvivalProbability_t0tf(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=2, stop=6) - assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) - - -def test_SurvivalProbability_definedTaus(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)] - select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) - - -def test_SurvivalProbability_zeroMolecules(universe): - # no atom IDs found - with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=3, stop=6, verbose=True) - assert all(np.isnan(sp.sp_timeseries[1:])) - - -def test_SurvivalProbability_alwaysPresent(universe): - # always the same atom IDs found, 7 and 8 - with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6, verbose=True) - assert all(np.equal(sp.sp_timeseries, 1)) - - -def test_SurvivalProbability_stepLargerThanDtmax(universe): - # Testing if the frames are skipped correctly - with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=2, step=5, stop=9, verbose=True) - assert_equal(sp.sp_timeseries, [1, 1, 1]) - # with tau_max=2 for all the frames we only read 6 of them - # this is because the frames which are not used are skipped, and therefore 'select_atoms' - assert universe.trajectory.n_frames > 6 - assert_equal(select_atoms_mock.call_count, 6) - - -def test_SurvivalProbability_stepEqualDtMax(universe): - with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: - sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=4, step=5, stop=9, verbose=True) - # all frames from 0, with 9 inclusive - assert_equal(select_atoms_mock.call_count, 10) - - def test_SurvivalProbability_intermittency1and2(universe): """ Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, but then returns at the following step. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)] + ids = [(9, 8), (), (8,), (9,), (8,), (), (9, 8), (), (8,), (9, 8)] select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - assert all((x == {9, 8} for x in sp.selected_ids)) + assert all(x == {9, 8} for x in sp._intermittent_selected_ids) assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1]) def test_SurvivalProbability_intermittency2lacking(universe): """ - If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. + If an atom is not present for more than 2 consecutive frames, + it is considered to have left the region. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: ids = [(9,), (), (), (), (9,), (), (), (), (9,)] @@ -186,7 +116,7 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == {2, 3} for x in sp.selected_ids)) + assert all((x == {2, 3} for x in sp._intermittent_selected_ids)) assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) @@ -201,11 +131,42 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) - assert all((x == {1} for x in sp.selected_ids)) - assert len(sp.selected_ids) == beforepopsing + assert all((x == {1} for x in sp._intermittent_selected_ids)) + assert len(sp._selected_ids) == beforepopsing assert_almost_equal(sp.sp_timeseries, [1, 1]) +def test_intermittency_none(): + # No changes asked - returns the same data + input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=0) + assert all(x == y for x,y in zip(input_ids, corrected)) + + +def test_intermittency_1and2(): + # The maximum gap in the dataset is 2, so the IDs are always present after correction + input_ids = [{9, 8}, set(), {8, }, {9, }, {8, }, set(), {9, 8}, set(), {8, }, {9, 8, }] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {9, 8} for x in corrected)) + + +def test_intermittency_2tooShort(): + #The IDs are abscent for too long/ + input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all(x == y for x, y in zip(input_ids, corrected)) + + +def test_intermittency_setsOfSets(): + # Verificaiton for the case of hydrogen bonds (sets of sets) + input_ids = [{frozenset({1,2}), frozenset({3, 4})},set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}, set(), set(), + {frozenset({1, 2}), frozenset({3, 4})}] + corrected = correct_intermittency(input_ids, intermittency=2) + assert all((x == {frozenset({1, 2}), frozenset({3, 4})} for x in corrected)) + + def test_autocorrelation_alwaysPresent(): input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}] tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3) @@ -218,62 +179,99 @@ def test_autocorrelation_definedTaus(): assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0]) -def test_autocorrelation_intermittency1and2(): - """ - Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, - but then returns at the following step. - """ - input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) - # input_ids are modified in place, check if the gaps have been removed correctly - assert all((x == {9, 8} for x in input_ids)) - assert_almost_equal(sp_timeseries, [1, 1, 1, 1]) - - -def test_autocorrelation_intermittency2lacking(): - """ - If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. - """ - input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2) - assert_almost_equal(sp_timeseries, [1, 0, 0, 0]) - - def test_autocorrelation_intermittency1_windowJump_intermittencyAll(): """ Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. No frames should be skipped so intermittency should be applied to all. """ input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, - intermittency=1, window_jump=5) - assert all((x == {2, 3} for x in input_ids)) + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) + assert all((x == {2, 3} for x in corrected)) assert_almost_equal(sp_timeseries, [1, 1, 1]) -def test_autocorrelation_intermittency0_windowBigJump(): - """ - The empty sets are ignored (no interrttency) - """ +def test_autocorrelation_windowBigJump(): + #The empty sets are ignored (no intermittency) input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) assert_almost_equal(sp_timeseries, [1, 1, 1]) -def test_autocorrelation_intermittency0_windowBigJump_absence(): - """ - In the last frame the molecules are absent - """ +def test_autocorrelation_windowBigJump_absence(): + # In the last frame the molecules are absent input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5) assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.]) +def test_autocorrelation_intermittency1_many(): + input_ids = [{1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}] + corrected = correct_intermittency(input_ids, intermittency=1) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=14, + window_step=5) + assert_almost_equal(sp_timeseries, [1] * 15) + + def test_autocorrelation_intermittency2_windowBigJump(): - """ - The intermittency corrects the last frame - """ + # The intermittency corrects the last frame input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}] - tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, - intermittency=2, window_jump=5) + corrected = correct_intermittency(input_ids, intermittency=2) + tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2, + window_step=5) assert_almost_equal(sp_timeseries, [1, 1, 1]) + + +def test_SurvivalProbability_t0tf(universe): + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2)) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=2, stop=6) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) + + +def test_SurvivalProbability_definedTaus(universe): + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=0, stop=6, verbose=True) + assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0]) + + +def test_SurvivalProbability_zeroMolecules(universe): + # no atom IDs found + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=3, stop=6, verbose=True) + assert all(np.isnan(sp.sp_timeseries[1:])) + + +def test_SurvivalProbability_alwaysPresent(universe): + # always the same atom IDs found, 7 and 8 + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, start=0, stop=6, verbose=True) + assert all(np.equal(sp.sp_timeseries, 1)) + + +def test_SurvivalProbability_stepLargerThanDtmax(universe): + # Testing if the frames are skipped correctly + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=2, step=5, stop=9, verbose=True) + assert_equal(sp.sp_timeseries, [1, 1, 1]) + # with tau_max=2 for all the frames we only read 6 of them + # this is because the frames which are not used are skipped, and therefore 'select_atoms' + assert universe.trajectory.n_frames > 6 + assert_equal(select_atoms_mock.call_count, 6) + + +def test_SurvivalProbability_stepEqualDtMax(universe): + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=4, step=5, stop=9, verbose=True) + # all frames from 0, with 9 inclusive + assert_equal(select_atoms_mock.call_count, 10) From 7514377698b43787760512b55cec99c520ca4d3f Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 29 Apr 2019 13:08:31 +0100 Subject: [PATCH 17/40] Updated logging. --- package/MDAnalysis/analysis/waterdynamics.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 1f578f2a7ed..06476dc4346 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -509,7 +509,6 @@ def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=Non warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " "which has .autocorrelation function", category=DeprecationWarning) - pass class WaterOrientationalRelaxation(object): @@ -1138,7 +1137,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window: - logger.info("Skipping the next %d frames:" % num_frames_to_skip) + logger.info("Skipping the next %d frames:", num_frames_to_skip) frame_no += num_frames_to_skip frame_loaded_counter = 0 # Correct the number of frames to be loaded after the first window (which starts at t=0, and @@ -1149,7 +1148,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # update the frame number self.universe.trajectory[frame_no] - logging.info("Loading frame:", self.universe.trajectory.ts) + logger.info("Loading frame: %d", self.universe.trajectory.frame) atoms = self.universe.select_atoms(self.selection) # SP of residues or of atoms @@ -1169,7 +1168,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # warn the user if the NaN are found if all(np.isnan(sp_timeseries[1:])): - logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ') + logger.warning('NaN Error: Most likely data was not found. Check your atom selections. ') # user can investigate the distribution and sample size self.sp_timeseries_data = sp_timeseries_data From f04eb690db02bc458aac05674a798aea83bdaa11 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 29 Apr 2019 13:10:17 +0100 Subject: [PATCH 18/40] Added __future__ imports. --- package/MDAnalysis/analysis/utils/autocorrelation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index efb25c40623..5349eebf6b9 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -21,6 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # +from __future__ import absolute_import, division import numpy as np from copy import deepcopy From 2f0d37dff412a2b79a9bf8c106d9fcd309d79a34 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 1 Mar 2020 12:39:44 +0000 Subject: [PATCH 19/40] Remove deprecation warning for HydrogenBondLifetime. This function will be removed in a future PR once a replacement that uses the new hydrogen bond class is in place. --- package/MDAnalysis/analysis/waterdynamics.py | 208 ++++++++++++++++++- 1 file changed, 203 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 06476dc4346..ac5beaa7ef2 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -458,7 +458,6 @@ class HydrogenBondLifetimes(object): r"""Hydrogen bond lifetime analysis - fixme - deprecate it This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes" (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can @@ -505,10 +504,209 @@ class HydrogenBondLifetimes(object): .. versionadded:: 0.11.0 """ - def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False): - warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " - "which has .autocorrelation function", - category=DeprecationWarning) + def __init__(self, universe, selection1, selection2, t0, tf, dtmax, + nproc=1): + #warnings.warn("This class is deprecated; use analysis.hydrogen_bonds.hbond_analysis.HydrogenBondAnalysis ", + # category=DeprecationWarning) + self.universe = universe + self.selection1 = selection1 + self.selection2 = selection2 + self.t0 = t0 + self.tf = tf - 1 + self.dtmax = dtmax + self.nproc = nproc + self.timeseries = None + + def _getC_i(self, HBP, t0, t): + """ + This function give the intermitent Hydrogen Bond Lifetime + C_i = / between t0 and t + """ + C_i = 0 + for i in range(len(HBP[t0])): + for j in range(len(HBP[t])): + if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]): + C_i += 1 + break + if len(HBP[t0]) == 0: + return 0.0 + else: + return float(C_i) / len(HBP[t0]) + + def _getC_c(self, HBP, t0, t): + """ + This function give the continous Hydrogen Bond Lifetime + C_c = / between t0 and t + """ + C_c = 0 + dt = 1 + begt0 = t0 + HBP_cp = HBP + HBP_t0 = HBP[t0] + newHBP = [] + if t0 == t: + return 1.0 + while t0 + dt <= t: + for i in range(len(HBP_t0)): + for j in range(len(HBP_cp[t0 + dt])): + if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and + HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]): + newHBP.append(HBP_t0[i]) + break + C_c = len(newHBP) + t0 += dt + HBP_t0 = newHBP + newHBP = [] + if len(HBP[begt0]) == 0: + return 0 + else: + return C_c / float(len(HBP[begt0])) + + def _intervC_c(self, HBP, t0, tf, dt): + """ + This function gets all the data for the h(t0)h(t0+dt)', where + t0 = 1,2,3,...,tf. This function give us one point of the final plot + HBL vs t + """ + a = 0 + count = 0 + for i in range(len(HBP)): + if (t0 + dt <= tf): + if t0 == t0 + dt: + b = self._getC_c(HBP, t0, t0) + break + b = self._getC_c(HBP, t0, t0 + dt) + t0 += dt + a += b + count += 1 + if count == 0: + return 1.0 + return a / count + + def _intervC_i(self, HBP, t0, tf, dt): + """ + This function gets all the data for the h(t0)h(t0+dt), where + t0 = 1,2,3,...,tf. This function give us a point of the final plot + HBL vs t + """ + a = 0 + count = 0 + for i in range(len(HBP)): + if (t0 + dt <= tf): + b = self._getC_i(HBP, t0, t0 + dt) + t0 += dt + a += b + count += 1 + return a / count + + def _finalGraphGetC_i(self, HBP, t0, tf, maxdt): + """ + This function gets the final data of the C_i graph. + """ + output = [] + for dt in range(maxdt): + a = self._intervC_i(HBP, t0, tf, dt) + output.append(a) + return output + + def _finalGraphGetC_c(self, HBP, t0, tf, maxdt): + """ + This function gets the final data of the C_c graph. + """ + output = [] + for dt in range(maxdt): + a = self._intervC_c(HBP, t0, tf, dt) + output.append(a) + return output + + def _getGraphics(self, HBP, t0, tf, maxdt): + """ + Function that join all the results into a plot. + """ + a = [] + cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt) + inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt) + for i in range(len(cont)): + fix = [cont[i], inte[i]] + a.append(fix) + return a + + def _HBA(self, ts, conn, universe, selAtom1, selAtom2, + verbose=False): + """ + Main function for calculate C_i and C_c in parallel. + """ + finalGetResidue1 = selAtom1 + finalGetResidue2 = selAtom2 + frame = ts.frame + h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, + finalGetResidue1, + finalGetResidue2, + distance=3.5, + angle=120.0, + start=frame - 1, + stop=frame) + while True: + try: + h.run(verbose=verbose) + break + except: + print("error") + print("trying again") + sys.stdout.flush() + sys.stdout.flush() + conn.send(h.timeseries[0]) + conn.close() + + def run(self, **kwargs): + """Analyze trajectory and produce timeseries""" + h_list = [] + i = 0 + if (self.nproc > 1): + while i < len(self.universe.trajectory): + jobs = [] + k = i + for j in range(self.nproc): + # start + print("ts=", i + 1) + if i >= len(self.universe.trajectory): + break + conn_parent, conn_child = multiprocessing.Pipe(False) + while True: + try: + # new thread + jobs.append( + (multiprocessing.Process( + target=self._HBA, + args=(self.universe.trajectory[i], + conn_child, self.universe, + self.selection1, self.selection2,)), + conn_parent)) + break + except: + print("error in jobs.append") + jobs[j][0].start() + i = i + 1 + + for j in range(self.nproc): + if k >= len(self.universe.trajectory): + break + rec01 = jobs[j][1] + received = rec01.recv() + h_list.append(received) + jobs[j][0].join() + k += 1 + self.timeseries = self._getGraphics( + h_list, 0, self.tf - 1, self.dtmax) + else: + h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, + self.selection1, + self.selection2, + distance=3.5, + angle=120.0) + h_list.run(**kwargs) + self.timeseries = self._getGraphics( + h_list.timeseries, self.t0, self.tf, self.dtmax) class WaterOrientationalRelaxation(object): From efa8998d02be0fd0d032e4ebdb8da26982941470 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 1 Mar 2020 12:59:40 +0000 Subject: [PATCH 20/40] Updated docs. --- .../analysis/utils/autocorrelation.py | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 5349eebf6b9..f4e841972a9 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -29,13 +29,9 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): r"""The descrete implementation of the autocorrelation function. - Here is a random equation that shows something. - .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} - - fixme - list_of_sets: Modifies in place! Parameters ---------- list_of_sets : list @@ -44,12 +40,6 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): The last tau (inclusive) for which to carry out autocorrelation. window_step : int, optional The step for the t0 to perform autocorrelation (without the overlap). Default is 1. - fixme - move intermittency to the right place - intermittency : int, optional - Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns, - intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any - frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will - be counted as present. Returns -------- @@ -92,20 +82,35 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): return tau_timeseries, timeseries, timeseries_data -# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds) + def correct_intermittency(list_of_sets, intermittency): """ - Pre-process Consecutive Intermittency with a single pass over the data. - If an atom is absent for a number of frames equal or smaller - than the parameter intermittency, then correct the data and remove the absence. - ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence + Process consecutive intermittency with a single pass over the data. The returned data can be used as input to + the function `autocorrelation` in order to calculate the discrete autocorrelation function with a given + intermittency. + + If an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, then correct + the data and remove the absence. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. + + # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds) Parameters ---------- - id_list: list of sets - returns a new list with the IDs with added IDs which disappeared for <= :param intermittency - intermittency: int - the max gap allowed and to be corrected + list_of_sets: list + In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a + single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}] + intermittency : int + The maximum gap allowed. The default intermittency=0 means that if the datapoint is missing at any frame, no + changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two + consecutive frames will be instead be considered present. + + Returns + ------- + returns a new list with the IDs with added IDs which disappeared for <= :param intermittency. + e.g If [{0, 1}, {0}, {0}, {0, 1}] is a list of sets of atom ids present at each frame and `intermittency=2`, + both atoms will be considered present throughout and thus the returned list of sets will be + [{0, 1}, {0, 1}, {0, 1}, {0, 1}]. + """ if intermittency == 0: return list_of_sets @@ -143,4 +148,4 @@ def correct_intermittency(list_of_sets, intermittency): list_of_sets[i + j - k].add(atomid) seen_frames_ago[atomid] = 0 - return list_of_sets \ No newline at end of file + return list_of_sets From abe62cb603c5587ccb031702b7c9316ca32193f9 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 1 Mar 2020 13:09:45 +0000 Subject: [PATCH 21/40] Added checks for parameters types and dimensions. --- package/MDAnalysis/analysis/utils/autocorrelation.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index f4e841972a9..f28e64bd8cb 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -52,9 +52,14 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): .. versionadded:: 0.19.2 """ - # fixme - add checks if this is a list of sets - # fixme - check dimensions - # fixme - check parameters? + + # Check types + assert type(list_of_sets) == list, "list_of_sets must be a one-dimensional list of sets" + assert type(list_of_sets[0]) == set, "list_of_sets must be a one-dimensional list of sets" + + # Check dimensions of parameters + assert len(list_of_sets) > tau_max > window_step, "window_step cannot be greater than tau_max, which cannot" \ + "be greater than the length of list_of_sets" tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] From bef47c21117d5c2779045658d49b2751c7d06911 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 1 Mar 2020 13:15:18 +0000 Subject: [PATCH 22/40] Fixed imports. --- package/MDAnalysis/analysis/waterdynamics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index ac5beaa7ef2..d74ecfe310d 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -449,7 +449,9 @@ import logging import warnings import numpy as np +import multiprocessing +import MDAnalysis.analysis.hbonds from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation, correct_intermittency From 60b2b8a16038c023a8215860644b8dd29249c1f1 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 1 Mar 2020 13:26:01 +0000 Subject: [PATCH 23/40] Removed deprecation warning. The hydrogen bond lifetime calculation will be replaced in a future PR>. Co-authored-by: pjsmith Co-authored-by: bieniekmateusz --- package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 93b332e6ec7..3f98bba8f34 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -297,9 +297,9 @@ def __init__(self, universe, nsamples=50, # number of different points to sample in a run pbc=True): - warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " - "which has .autocorrelation function", - category=DeprecationWarning) + #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + # "which has .autocorrelation function", + # category=DeprecationWarning) self.u = universe # check that slicing is possible From 725c2fc5c8f9602894ae0bea46120c40a27a3c4c Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 2 Mar 2020 10:55:42 +0000 Subject: [PATCH 24/40] Removed incorrect Assertion Error. Co-authored-by: pjsmith Co-authored-by: bieniekmateusz --- package/MDAnalysis/analysis/utils/autocorrelation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index f28e64bd8cb..3b174a76b32 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -58,8 +58,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): assert type(list_of_sets[0]) == set, "list_of_sets must be a one-dimensional list of sets" # Check dimensions of parameters - assert len(list_of_sets) > tau_max > window_step, "window_step cannot be greater than tau_max, which cannot" \ - "be greater than the length of list_of_sets" + assert len(list_of_sets) > tau_max, "tau_max cannot be greater than the length of list_of_sets" tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] From 221034449bf4fca486c47390b0ba75bccffb30fd Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Mon, 6 Apr 2020 19:26:30 +0100 Subject: [PATCH 25/40] Adding the test case back to the hydrogenBondLifetime. --- testsuite/MDAnalysisTests/analysis/test_waterdynamics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e9a4440c7c5..e4c39889537 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -42,6 +42,13 @@ def universe(): return MDAnalysis.Universe(waterPSF, waterDCD) +def test_HydrogenBondLifetimes(universe): + hbl = waterdynamics.HydrogenBondLifetimes( + universe, SELECTION1, SELECTION1, 0, 5, 3) + hbl.run() + assert_almost_equal(hbl.timeseries[2][1], 0.75, 5) + + def test_WaterOrientationalRelaxation(universe): wor = waterdynamics.WaterOrientationalRelaxation( universe, SELECTION1, 0, 5, 2) From 0fa3c2bff4d95de8fcaf3e697bfd51b31a3cde7d Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Mon, 6 Apr 2020 19:58:56 +0100 Subject: [PATCH 26/40] Inserted back the accidentally removed PR #2617 --- package/MDAnalysis/analysis/waterdynamics.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 772e50bf8d0..8df5492fc51 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -811,11 +811,9 @@ def _sameMolecTandDT(self, selection, t0d, tf): def _selection_serial(self, universe, selection_str): selection = [] - pm = ProgressMeter(universe.trajectory.n_frames, - interval=10, verbose=True) - for ts in universe.trajectory: + for ts in ProgressBar(universe.trajectory, verbose=True, + total=universe.trajectory.n_frames): selection.append(universe.select_atoms(selection_str)) - pm.echo(ts.frame) return selection @staticmethod @@ -986,11 +984,9 @@ def run(self, **kwargs): def _selection_serial(self, universe, selection_str): selection = [] - pm = ProgressMeter(universe.trajectory.n_frames, - interval=10, verbose=True) - for ts in universe.trajectory: + for ts in ProgressBar(universe.trajectory, verbose=True, + total=universe.trajectory.n_frames): selection.append(universe.select_atoms(selection_str)) - pm.echo(ts.frame) return selection @@ -1128,11 +1124,9 @@ def _sameMolecTandDT(self, selection, t0d, tf): def _selection_serial(self, universe, selection_str): selection = [] - pm = ProgressMeter(universe.trajectory.n_frames, - interval=10, verbose=True) - for ts in universe.trajectory: + for ts in ProgressBar(universe.trajectory, verbose=True, + total=universe.trajectory.n_frames): selection.append(universe.select_atoms(selection_str)) - pm.echo(ts.frame) return selection def run(self, **kwargs): From 1c12463536dec09abba7ae0760c4460fe453f9a3 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Mon, 6 Apr 2020 20:00:58 +0100 Subject: [PATCH 27/40] Update package/MDAnalysis/analysis/utils/autocorrelation.py Co-Authored-By: Oliver Beckstein --- package/MDAnalysis/analysis/utils/autocorrelation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 3b174a76b32..60b0750cd4b 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -27,7 +27,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): - r"""The descrete implementation of the autocorrelation function. + r"""The discrete implementation of the autocorrelation function. .. math:: C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} From bdd967c36922714e1f517a4c9100fa97e61cc349 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Mon, 6 Apr 2020 20:01:38 +0100 Subject: [PATCH 28/40] Update package/MDAnalysis/analysis/utils/autocorrelation.py Co-Authored-By: Oliver Beckstein --- package/MDAnalysis/analysis/utils/autocorrelation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 60b0750cd4b..c7e43e6fcb3 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -35,7 +35,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): Parameters ---------- list_of_sets : list - List of sets, + List of sets tau_max : int The last tau (inclusive) for which to carry out autocorrelation. window_step : int, optional From 30cf75a339303f94ad69db5522780649af06e028 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Mon, 6 Apr 2020 20:11:34 +0100 Subject: [PATCH 29/40] Inserted back the accidentally removed PR #2617 Correction for PR #2256 --- package/MDAnalysis/analysis/hbonds/hbond_analysis.py | 2 -- package/MDAnalysis/analysis/waterdynamics.py | 1 - 2 files changed, 3 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 6421506cb00..374d65a9d32 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -337,8 +337,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): from ..utils.autocorrelation import autocorrelation, correct_intermittency -from ..utils.autocorrelation import autocorrelation, correct_intermittency - logger = logging.getLogger('MDAnalysis.analysis.hbonds') diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 8df5492fc51..b030a219c21 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -450,7 +450,6 @@ import warnings import numpy as np import MDAnalysis.analysis.hbonds -from MDAnalysis.lib.log import ProgressMeter from .utils.autocorrelation import autocorrelation, correct_intermittency logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') From 67a965d0b0c531b7c275b160acba12eee9d3c6b2 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Sat, 11 Apr 2020 18:46:47 +0100 Subject: [PATCH 30/40] Fixed and expanded the documentation for the autocorrelation. PR #2256 co-authored-by: p-j-smith --- .../analysis/utils/autocorrelation.py | 61 +++++++++++++++---- .../documentation_pages/analysis/utils.rst | 1 + .../documentation_pages/analysis_modules.rst | 8 +++ 3 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/utils.rst diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index c7e43e6fcb3..820a66f5ea0 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -27,28 +27,62 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): - r"""The discrete implementation of the autocorrelation function. + r"""Implementation of a discrete autocorrelation function. + The autocorrelation of a property $x$ from a time $t=t_0$ to $t=t_0 + \tau$ + is given by: .. math:: - C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)} + C(\tau) = \langle \frac{ x(t_0)x(t_0 +\tau) }{ x(t_0)x(t_0) } \rangle + + where $x$ may represent any property of a particle, such as velocity or + potential energy. + + The survival probability, $S(\tau)$, is a special case of the time + autocorrelation function in which the property under consideration can + be encoded with indicator variables, $0$ and $1$, to represent the binary + state of said property. For instance, in calculating the survival probability + of water molecules within $5 \rm \AA$, each water molecule will either be + within this cutoff range ($1$) or not ($0$). The total number of water + molecules within the cutoff at time $t_0$ will be given by $N(t_0)$. + + The survival probability of a property of a set of particles is + given by: + + .. math:: + S(\tau) = \langle \frac{ N(t_0, t_0 + \tau )} { N(t_0) }\rangle + + where $N(t0)$ is the number of particles at time $t_0$ for which the feature + is observed, and $N(t0, t_0 + \tau)$ is the number of particles for which + this feature is present at every frame from $t_0$ to $N(t0, t_0 + \tau)$. + The angular brackets represent an average over all time origins, $t_0$. + + See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037) + for a description survival probability. Parameters ---------- list_of_sets : list - List of sets + List of sets. Each set corresponds to data from a single frame. Each element in a set + may be, for example, an atom id or a tuple of atoms ids. In the case of calculating the + survival probability of water around a protein, these atom ids in a given set will be + those of the atoms which are within a cutoff distance of the protein at a given frame. tau_max : int - The last tau (inclusive) for which to carry out autocorrelation. + The last tau (inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20, + the survival probability will be calculated over 20 frames. window_step : int, optional - The step for the t0 to perform autocorrelation (without the overlap). Default is 1. + The step size for t0 to perform autocorrelation. Ideally, window_step will be larger than + tau_max to ensure independence of each window for which the calculation is performed. + Default is 1. Returns -------- tau_timeseries : list of int - the tau for which the autocorrelation was calculated + the values of tau for which the autocorrelation was calculated timeseries : list of int the autocorelation values for each of the tau values timeseries_data : list of list of int - the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated. + the raw data from which the autocorrelation is computed, i.e $S(\tau)$ at each window. + This allows the time dependant evolution of $S(\tau)$ to be investigated. .. versionadded:: 0.19.2 """ @@ -88,13 +122,18 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): def correct_intermittency(list_of_sets, intermittency): - """ + """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`. + Process consecutive intermittency with a single pass over the data. The returned data can be used as input to - the function `autocorrelation` in order to calculate the discrete autocorrelation function with a given + the function `autocorrelation` in order to calculate the survival probability with a given intermittency. - If an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, then correct - the data and remove the absence. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. + For example, if an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, + then correct the data and remove the absence. + e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. + + See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of + intermittency in the calculation of hydrogen bond lifetimes. # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds) diff --git a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst new file mode 100644 index 00000000000..048e8216099 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.analysis.utils \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 19081f1b5dd..6c19528a3e8 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -147,3 +147,11 @@ Data :maxdepth: 1 analysis/data + +Utils +===== + +.. toctree:: + :maxdepth: 1 + + analysis/utils From fa8df30c8591fa7233bea13875c5440b75435112 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Sun, 12 Apr 2020 17:29:13 +0100 Subject: [PATCH 31/40] Introducing the requested corrections PR #2256 co-authored-by: p-j-smith --- package/MDAnalysis/analysis/utils/autocorrelation.py | 11 ++++++----- package/MDAnalysis/analysis/waterdynamics.py | 2 -- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 820a66f5ea0..b7cb4284651 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -67,7 +67,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): survival probability of water around a protein, these atom ids in a given set will be those of the atoms which are within a cutoff distance of the protein at a given frame. tau_max : int - The last tau (inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20, + The last tau (lag time, inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20, the survival probability will be calculated over 20 frames. window_step : int, optional The step size for t0 to perform autocorrelation. Ideally, window_step will be larger than @@ -87,12 +87,13 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): .. versionadded:: 0.19.2 """ - # Check types - assert type(list_of_sets) == list, "list_of_sets must be a one-dimensional list of sets" - assert type(list_of_sets[0]) == set, "list_of_sets must be a one-dimensional list of sets" + # check types + if (type(list_of_sets) != list and len(list_of_sets) != 0) or type(list_of_sets[0]) != set: + raise TypeError("list_of_sets must be a one-dimensional list of sets") # Check dimensions of parameters - assert len(list_of_sets) > tau_max, "tau_max cannot be greater than the length of list_of_sets" + if len(list_of_sets) < tau_max: + raise ValueError("tau_max cannot be greater than the length of list_of_sets") tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index b030a219c21..c7fb43a5685 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -508,8 +508,6 @@ class HydrogenBondLifetimes(object): def __init__(self, universe, selection1, selection2, t0, tf, dtmax, nproc=1): - #warnings.warn("This class is deprecated; use analysis.hydrogen_bonds.hbond_analysis.HydrogenBondAnalysis ", - # category=DeprecationWarning) self.universe = universe self.selection1 = selection1 self.selection2 = selection2 From 45ff277ad7668308c52b507a320b915652eb7b07 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Sun, 12 Apr 2020 17:40:29 +0100 Subject: [PATCH 32/40] Exceptions marked as not covered #2256 co-authored-by: p-j-smith --- package/MDAnalysis/analysis/utils/autocorrelation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index b7cb4284651..0a8eb9dc1b2 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -89,11 +89,11 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): # check types if (type(list_of_sets) != list and len(list_of_sets) != 0) or type(list_of_sets[0]) != set: - raise TypeError("list_of_sets must be a one-dimensional list of sets") + raise TypeError("list_of_sets must be a one-dimensional list of sets") # pragma: no cover # Check dimensions of parameters if len(list_of_sets) < tau_max: - raise ValueError("tau_max cannot be greater than the length of list_of_sets") + raise ValueError("tau_max cannot be greater than the length of list_of_sets") # pragma: no cover tau_timeseries = list(range(1, tau_max + 1)) timeseries_data = [[] for _ in range(tau_max)] From e262cd3464fc517087abfcab7a0d86a4e62d8e93 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Sun, 12 Apr 2020 17:50:22 +0100 Subject: [PATCH 33/40] Further clarifications #2256 co-authored-by: p-j-smith --- .../analysis/utils/autocorrelation.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 0a8eb9dc1b2..041e24fcc2d 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -125,14 +125,25 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): def correct_intermittency(list_of_sets, intermittency): """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`. - Process consecutive intermittency with a single pass over the data. The returned data can be used as input to - the function `autocorrelation` in order to calculate the survival probability with a given - intermittency. - - For example, if an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, - then correct the data and remove the absence. + Survival probabilty may be calculated either with a strict continuous requirement or + a less strict intermittency. If calculating the survival probability water around a + protein for example, in the former case the water must be within a cutoff distance + of the protein a every frame from $t_0$ to $t_0 + \tau$ in order for it to be considered + present at $t_0 + \tau$. In the intermittent case, the water molecule is allowed to + leave the region of interest for up to a specified consecutive number of frames. + + This function pre-processes data, such as the atom ids of water molecules within a cutoff + distance of a protein at each frame, in order to allow for intermittent behaviour, with a + single pass over the data. + + For example, if an atom is absent for a number of frames equal or smaller than the parameter + `intermittency`, then this absence will be removed and thus the atom is considered to have + not left. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. + The returned data can be used as input to the function `autocorrelation` in order to calculate + the survival probability with a given intermittency. + See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of intermittency in the calculation of hydrogen bond lifetimes. From a798d1e9d56d9acd85242c9c907f089f16272155 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Mon, 13 Apr 2020 15:38:48 +0100 Subject: [PATCH 34/40] Complete documentation for PR #2256 co-authored-by: p-j-smith --- .../analysis/utils/autocorrelation.py | 89 ++++++++++++++----- .../analysis/autocorrelation.rst | 9 ++ .../documentation_pages/analysis/utils.rst | 1 - .../documentation_pages/analysis_modules.rst | 4 +- 4 files changed, 77 insertions(+), 26 deletions(-) create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst delete mode 100644 package/doc/sphinx/source/documentation_pages/analysis/utils.rst diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py index 041e24fcc2d..c8b751bfad8 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/analysis/utils/autocorrelation.py @@ -21,6 +21,43 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # +"""Autocorrelation utilities --- :mod:`MDAnalysis.analysis.utils.autocorrelation` +================================================================================= + + +:Authors: Paul Smith & Mateusz Bieniek +:Year: 2020 +:Copyright: GNU Public License v2 + +.. versionadded:: 0.21.0 + +This module is primarily for internal use by other analysis modules. It +provides functionality for calculating the time autocorrelation function +of a binary variable (i.e one that is either true or false at each +frame for a given atom/molecule/set of molecules). This module includes +functions for calculating both the time continuous autocorrelation and +the intermittent autocorrelation. The function :func:`autocorrelation` +calculates the continuous autocorrelation only. The data may be +pre-processed using the function :func:`intermittency` in order to +acount for intermittency before passing the results to +:func:`autocorrelation`. + +See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a further +discussion on the time continuous and intermittent autocorrelation of +hydrogen bond lifetimes. + +**Analysis tools that make use of modules** + +* :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability` + Calculates the continuous or intermittent survival probability + of an atom group in a region of interest. + +* :class:`MDAnalysis.analysis.hbonds.hbond_analysis` + Calculates the continuous of intermittent hydrogen bond + lifetime. + +""" + from __future__ import absolute_import, division import numpy as np from copy import deepcopy @@ -29,21 +66,25 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): r"""Implementation of a discrete autocorrelation function. - The autocorrelation of a property $x$ from a time $t=t_0$ to $t=t_0 + \tau$ + The autocorrelation of a property :math:`x` from a time :math:`t=t_0` to :math:`t=t_0 + \tau` is given by: + .. math:: C(\tau) = \langle \frac{ x(t_0)x(t_0 +\tau) }{ x(t_0)x(t_0) } \rangle - where $x$ may represent any property of a particle, such as velocity or + where :math:`x` may represent any property of a particle, such as velocity or potential energy. - The survival probability, $S(\tau)$, is a special case of the time + This function is an implementation of a special case of the time autocorrelation function in which the property under consideration can - be encoded with indicator variables, $0$ and $1$, to represent the binary - state of said property. For instance, in calculating the survival probability - of water molecules within $5 \rm \AA$, each water molecule will either be - within this cutoff range ($1$) or not ($0$). The total number of water - molecules within the cutoff at time $t_0$ will be given by $N(t_0)$. + be encoded with indicator variables, :math:`0` and :math:`1`, to represent the binary + state of said property. This special case is often referred to as the + survival probability (:math:`S(\tau)`). As an example, in calculating the survival + probability of water molecules within :math:`5 \unicode{x212B}` of a protein, each water + molecule will either be within this cutoff range (:math:`1`) or not (:math:`0`). The + total number of water molecules within the cutoff at time :math:`t_0` will be + given by :math:`N(t_0)`. Other cases include the Hydrogen Bond Lifetime as + well as the translocation rate of cholesterol across a bilayer. The survival probability of a property of a set of particles is given by: @@ -51,10 +92,10 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): .. math:: S(\tau) = \langle \frac{ N(t_0, t_0 + \tau )} { N(t_0) }\rangle - where $N(t0)$ is the number of particles at time $t_0$ for which the feature - is observed, and $N(t0, t_0 + \tau)$ is the number of particles for which - this feature is present at every frame from $t_0$ to $N(t0, t_0 + \tau)$. - The angular brackets represent an average over all time origins, $t_0$. + where :math:`N(t0)` is the number of particles at time :math:`t_0` for which the feature + is observed, and :math:`N(t0, t_0 + \tau)` is the number of particles for which + this feature is present at every frame from :math:`t_0` to :math:`N(t0, t_0 + \tau)`. + The angular brackets represent an average over all time origins, :math:`t_0`. See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037) for a description survival probability. @@ -81,8 +122,8 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): timeseries : list of int the autocorelation values for each of the tau values timeseries_data : list of list of int - the raw data from which the autocorrelation is computed, i.e $S(\tau)$ at each window. - This allows the time dependant evolution of $S(\tau)$ to be investigated. + the raw data from which the autocorrelation is computed, i.e :math:`S(\tau)` at each window. + This allows the time dependant evolution of :math:`S(\tau)` to be investigated. .. versionadded:: 0.19.2 """ @@ -123,26 +164,27 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): def correct_intermittency(list_of_sets, intermittency): - """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`. + r"""Preprocess data to allow intermittent behaviour prior to calling :func:`autocorrelation`. Survival probabilty may be calculated either with a strict continuous requirement or a less strict intermittency. If calculating the survival probability water around a protein for example, in the former case the water must be within a cutoff distance - of the protein a every frame from $t_0$ to $t_0 + \tau$ in order for it to be considered - present at $t_0 + \tau$. In the intermittent case, the water molecule is allowed to - leave the region of interest for up to a specified consecutive number of frames. + of the protein at every frame from :math:`t_0` to :math:`t_0 + \tau` in order for it to be considered + present at :math:`t_0 + \tau`. In the intermittent case, the water molecule is allowed to + leave the region of interest for up to a specified consecutive number of frames whilst still + being considered present at :math:`t_0 + \tau`. This function pre-processes data, such as the atom ids of water molecules within a cutoff distance of a protein at each frame, in order to allow for intermittent behaviour, with a single pass over the data. For example, if an atom is absent for a number of frames equal or smaller than the parameter - `intermittency`, then this absence will be removed and thus the atom is considered to have + :attr:`intermittency`, then this absence will be removed and thus the atom is considered to have not left. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence. - The returned data can be used as input to the function `autocorrelation` in order to calculate - the survival probability with a given intermittency. + The returned data can be used as input to the function :func:`autocorrelation` in order + to calculate the survival probability with a given intermittency. See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of intermittency in the calculation of hydrogen bond lifetimes. @@ -155,13 +197,14 @@ def correct_intermittency(list_of_sets, intermittency): In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}] intermittency : int - The maximum gap allowed. The default intermittency=0 means that if the datapoint is missing at any frame, no + The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two consecutive frames will be instead be considered present. Returns ------- - returns a new list with the IDs with added IDs which disappeared for <= :param intermittency. + list_of_sets: list + returns a new list with the IDs with added IDs which disappeared for <= :attr:`intermittency`. e.g If [{0, 1}, {0}, {0}, {0, 1}] is a list of sets of atom ids present at each frame and `intermittency=2`, both atoms will be considered present throughout and thus the returned list of sets will be [{0, 1}, {0, 1}, {0, 1}, {0, 1}]. diff --git a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst new file mode 100644 index 00000000000..857f8c000f5 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst @@ -0,0 +1,9 @@ +.. automodule:: MDAnalysis.analysis.utils.autocorrelation + + Autocorrelation Function + ------------------------ + .. autofunction:: MDAnalysis.analysis.utils.autocorrelation.autocorrelation + + Intermittency Function + ---------------------- + .. autofunction:: MDAnalysis.analysis.utils.autocorrelation.correct_intermittency \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst deleted file mode 100644 index 048e8216099..00000000000 --- a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: MDAnalysis.analysis.utils \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 6c19528a3e8..d4bdced3046 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -152,6 +152,6 @@ Utils ===== .. toctree:: - :maxdepth: 1 + :maxdepth: 2 - analysis/utils + analysis/autocorrelation From cddf0a5d67bee652d164b6c1e19b53394922c976 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Tue, 14 Apr 2020 19:16:23 +0100 Subject: [PATCH 35/40] Moving the package to .lib.correlations + reST citations (PR #2256) co-authored-by: p-j-smith --- .../analysis/hbonds/hbond_analysis.py | 3 +-- package/MDAnalysis/analysis/waterdynamics.py | 5 ++-- .../correlations.py} | 26 ++++++++++++------- .../analysis/autocorrelation.rst | 9 ------- .../documentation_pages/analysis_modules.rst | 8 ------ .../documentation_pages/lib/correlations.rst | 9 +++++++ .../documentation_pages/lib_modules.rst | 3 ++- .../analysis/test_waterdynamics.py | 2 +- 8 files changed, 33 insertions(+), 32 deletions(-) rename package/MDAnalysis/{analysis/utils/autocorrelation.py => lib/correlations.py} (90%) delete mode 100644 package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst create mode 100644 package/doc/sphinx/source/documentation_pages/lib/correlations.rst diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 374d65a9d32..6ec450b9a52 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -334,8 +334,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): from MDAnalysis.lib.log import ProgressBar from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch from MDAnalysis.lib import distances - -from ..utils.autocorrelation import autocorrelation, correct_intermittency +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency logger = logging.getLogger('MDAnalysis.analysis.hbonds') diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index c7fb43a5685..4498356d3ae 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -445,12 +445,13 @@ """ from __future__ import print_function, division, absolute_import +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency +import MDAnalysis.analysis.hbonds from six.moves import range, zip_longest import logging import warnings import numpy as np -import MDAnalysis.analysis.hbonds -from .utils.autocorrelation import autocorrelation, correct_intermittency + logger = logging.getLogger('MDAnalysis.analysis.waterdynamics') from MDAnalysis.lib.log import ProgressBar diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/lib/correlations.py similarity index 90% rename from package/MDAnalysis/analysis/utils/autocorrelation.py rename to package/MDAnalysis/lib/correlations.py index c8b751bfad8..0b6af25c231 100644 --- a/package/MDAnalysis/analysis/utils/autocorrelation.py +++ b/package/MDAnalysis/lib/correlations.py @@ -21,7 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""Autocorrelation utilities --- :mod:`MDAnalysis.analysis.utils.autocorrelation` +"""Correlations utilities --- :mod:`MDAnalysis.lib.correlations` ================================================================================= @@ -42,11 +42,10 @@ acount for intermittency before passing the results to :func:`autocorrelation`. -See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a further -discussion on the time continuous and intermittent autocorrelation of -hydrogen bond lifetimes. +See [Gowers2015]_ for a further discussion on the time continuous and +intermittent autocorrelation of hydrogen bond lifetimes. -**Analysis tools that make use of modules** +.. seeAlso:: Analysis tools that make use of modules * :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability` Calculates the continuous or intermittent survival probability @@ -56,6 +55,15 @@ Calculates the continuous of intermittent hydrogen bond lifetime. +.. rubric:: References + +.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). A multiscale approach to model hydrogen bonding: + The case of polyamide. The Journal of Chemical Physics, 142(22), 224907. + https://doi.org/10.1063/1.4922445 +.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014). + Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure. + Biophysical Journal, 107(3), 599–612. https://doi.org/10.1016/J.BPJ.2014.05.037 + """ from __future__ import absolute_import, division @@ -80,7 +88,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): be encoded with indicator variables, :math:`0` and :math:`1`, to represent the binary state of said property. This special case is often referred to as the survival probability (:math:`S(\tau)`). As an example, in calculating the survival - probability of water molecules within :math:`5 \unicode{x212B}` of a protein, each water + probability of water molecules within 5 Å of a protein, each water molecule will either be within this cutoff range (:math:`1`) or not (:math:`0`). The total number of water molecules within the cutoff at time :math:`t_0` will be given by :math:`N(t_0)`. Other cases include the Hydrogen Bond Lifetime as @@ -97,8 +105,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1): this feature is present at every frame from :math:`t_0` to :math:`N(t0, t_0 + \tau)`. The angular brackets represent an average over all time origins, :math:`t_0`. - See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037) - for a description survival probability. + See [Araya-Secchi2014]_ for a description survival probability. Parameters ---------- @@ -186,7 +193,7 @@ def correct_intermittency(list_of_sets, intermittency): The returned data can be used as input to the function :func:`autocorrelation` in order to calculate the survival probability with a given intermittency. - See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of + See [Gowers2015]_ for a description of intermittency in the calculation of hydrogen bond lifetimes. # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds) @@ -247,3 +254,4 @@ def correct_intermittency(list_of_sets, intermittency): seen_frames_ago[atomid] = 0 return list_of_sets + diff --git a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst deleted file mode 100644 index 857f8c000f5..00000000000 --- a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. automodule:: MDAnalysis.analysis.utils.autocorrelation - - Autocorrelation Function - ------------------------ - .. autofunction:: MDAnalysis.analysis.utils.autocorrelation.autocorrelation - - Intermittency Function - ---------------------- - .. autofunction:: MDAnalysis.analysis.utils.autocorrelation.correct_intermittency \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index d4bdced3046..19081f1b5dd 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -147,11 +147,3 @@ Data :maxdepth: 1 analysis/data - -Utils -===== - -.. toctree:: - :maxdepth: 2 - - analysis/autocorrelation diff --git a/package/doc/sphinx/source/documentation_pages/lib/correlations.rst b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst new file mode 100644 index 00000000000..e68b0eba864 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst @@ -0,0 +1,9 @@ +.. automodule:: MDAnalysis.lib.correlations + + Autocorrelation Function + ------------------------ + .. autofunction:: MDAnalysis.lib.correlations.autocorrelation + + Intermittency Function + ---------------------- + .. autofunction:: MDAnalysis.lib.correlations.correct_intermittency \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/lib_modules.rst b/package/doc/sphinx/source/documentation_pages/lib_modules.rst index 0394d3fa28e..29ba1a05e8a 100644 --- a/package/doc/sphinx/source/documentation_pages/lib_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/lib_modules.rst @@ -59,6 +59,7 @@ List of modules ./lib/transformations ./lib/qcprot ./lib/util + ./lib/correlations Low level file formats ---------------------- @@ -74,4 +75,4 @@ Python-based projects. :maxdepth: 1 ./lib/formats/libmdaxdr - ./lib/formats/libdcd + ./lib/formats/libdcd \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e4c39889537..3d2325e1a21 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -23,7 +23,7 @@ from __future__ import print_function, absolute_import import MDAnalysis from MDAnalysis.analysis import waterdynamics -from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency +from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency from MDAnalysisTests.datafiles import waterPSF, waterDCD From 89991c379dbfb32b461047b89cec3a13ac2b6747 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Tue, 14 Apr 2020 19:48:01 +0100 Subject: [PATCH 36/40] Further clarifications #2256 co-authored-by: p-j-smith --- package/MDAnalysis/lib/correlations.py | 36 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py index 0b6af25c231..8555eb257f4 100644 --- a/package/MDAnalysis/lib/correlations.py +++ b/package/MDAnalysis/lib/correlations.py @@ -42,27 +42,37 @@ acount for intermittency before passing the results to :func:`autocorrelation`. -See [Gowers2015]_ for a further discussion on the time continuous and -intermittent autocorrelation of hydrogen bond lifetimes. +This module is inspired by seemingly disparate analyses that rely on the same +underlying calculation, including the survival probability of water around +proteins [Araya-Secchi2014]_, hydrogen bond lifetimes [Gowers2015]_, [Araya-Secchi2014]_, +and the rate of cholesterol flip-flop in lipid bilayers [Gu2019]_. -.. seeAlso:: Analysis tools that make use of modules +.. seeAlso:: -* :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability` - Calculates the continuous or intermittent survival probability - of an atom group in a region of interest. + Analysis tools that make use of modules: -* :class:`MDAnalysis.analysis.hbonds.hbond_analysis` - Calculates the continuous of intermittent hydrogen bond - lifetime. + * :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability` + Calculates the continuous or intermittent survival probability + of an atom group in a region of interest. + + * :class:`MDAnalysis.analysis.hbonds.hbond_analysis` + Calculates the continuous or intermittent hydrogen bond + lifetime. .. rubric:: References -.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). A multiscale approach to model hydrogen bonding: - The case of polyamide. The Journal of Chemical Physics, 142(22), 224907. +.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). + A multiscale approach to model hydrogen bonding: The case of polyamide. + The Journal of Chemical Physics, 142(22), 224907. https://doi.org/10.1063/1.4922445 -.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014). +.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014) Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure. - Biophysical Journal, 107(3), 599–612. https://doi.org/10.1016/J.BPJ.2014.05.037 + Biophysical Journal. 107(3), 599–612. + https://doi.org/10.1016/J.BPJ.2014.05.037 +.. [Gu2019] Gu, R.-X.; Baoukina, S.; Tieleman, D. P. (2019) + Cholesterol Flip-Flop in Heterogeneous Membranes. + J. Chem. Theory Comput. 15 (3), 2064–2070. + https://doi.org/10.1021/acs.jctc.8b00933. """ From 4acb174ee7e530aa19cd0b7e4a300697b72616b7 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Wed, 15 Apr 2020 10:56:51 +0100 Subject: [PATCH 37/40] Indentation corrections. co-authored-by: p-j-smith --- package/MDAnalysis/lib/correlations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py index 8555eb257f4..130f1ef57ad 100644 --- a/package/MDAnalysis/lib/correlations.py +++ b/package/MDAnalysis/lib/correlations.py @@ -214,9 +214,9 @@ def correct_intermittency(list_of_sets, intermittency): In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}] intermittency : int - The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no - changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two - consecutive frames will be instead be considered present. + The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no + changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two + consecutive frames will be instead be considered present. Returns ------- From 11e242b289b45b3cb12299130a5903af19277095 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Wed, 15 Apr 2020 14:05:26 +0100 Subject: [PATCH 38/40] Removing duplicate citation creating a sphinx warning #2256 co-authored-by: p-j-smith --- package/MDAnalysis/lib/correlations.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py index 130f1ef57ad..056722065a7 100644 --- a/package/MDAnalysis/lib/correlations.py +++ b/package/MDAnalysis/lib/correlations.py @@ -61,10 +61,6 @@ .. rubric:: References -.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). - A multiscale approach to model hydrogen bonding: The case of polyamide. - The Journal of Chemical Physics, 142(22), 224907. - https://doi.org/10.1063/1.4922445 .. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014) Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure. Biophysical Journal. 107(3), 599–612. From 6f3846a81c689b17a3cdc0bc64c34b343057ad85 Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Wed, 15 Apr 2020 16:37:28 +0100 Subject: [PATCH 39/40] Another duplicate citation (sphinx) PR #2256 co-authored-by: p-j-smith --- package/MDAnalysis/lib/correlations.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py index 056722065a7..483b5fcf97e 100644 --- a/package/MDAnalysis/lib/correlations.py +++ b/package/MDAnalysis/lib/correlations.py @@ -61,10 +61,6 @@ .. rubric:: References -.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014) - Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure. - Biophysical Journal. 107(3), 599–612. - https://doi.org/10.1016/J.BPJ.2014.05.037 .. [Gu2019] Gu, R.-X.; Baoukina, S.; Tieleman, D. P. (2019) Cholesterol Flip-Flop in Heterogeneous Membranes. J. Chem. Theory Comput. 15 (3), 2064–2070. From 95641de08566cb7d028f61d1d93d2c1c7e29c73d Mon Sep 17 00:00:00 2001 From: bieniekmat Date: Thu, 16 Apr 2020 19:45:37 +0100 Subject: [PATCH 40/40] Adding tags and summerising PR #2256 co-authored-by: p-j-smith --- package/CHANGELOG | 3 +++ package/MDAnalysis/analysis/hbonds/hbond_analysis.py | 3 +++ package/MDAnalysis/analysis/waterdynamics.py | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index c1f0127c1b4..77a2288672e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -116,6 +116,9 @@ Enhancements * Added weights_groupselections option in RMSD for mapping custom weights to groupselections (PR #2610, Issue #2429) * PersistenceLength.plot() now create new axes if current axes not provided (Issue #2590) + * Added a correlations module to provide functionality for analysis modules to + calculate the discrete autocorrelation function in a standardised way. Added + the capability to allow intermittent behaviour (PR #2256) Changes * Deprecated :class:`ProgressMeter` and replaced it with :class:`ProgressBar` using diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 6ec450b9a52..8b89425c961 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -383,6 +383,9 @@ class HydrogenBondAnalysis(base.AnalysisBase): DEFAULT_DONORS/ACCEPTORS is now embedded in a dict to switch between default values for different force fields. + .. versionchanged:: 0.21.0 + Added autocorrelation (MDAnalysis.lib.correlations.py) for calculating hydrogen bond lifetime + .. versionchanged:: 1.0.0 ``save_table()`` method has been removed. You can use ``np.save()`` or ``cPickle.dump()`` on :attr:`HydrogenBondAnalysis.table` instead. diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 4498356d3ae..70d42a26a93 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1172,7 +1172,8 @@ class SurvivalProbability(object): .. versionadded:: 0.11.0 - + .. versionchanged:: 0.21.0 + Using the MDAnalysis.lib.correlations.py to carry out the intermittency and autocorrelation calculations .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` """ @@ -1259,7 +1260,8 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices, # on which the parallel _single_frame can be applied. - # skip frames that will not be used + # skip frames that will not be used in order to improve performance + # because AtomGroup.select_atoms is the most expensive part of this calculation # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip # Intermittency means that we have to load the extra frames to know if the atom is actually missing. # Say step=5 and tau=1, intermittency=0: LLSSS LLSSS