From 8bc9d9f8aaf479b865e8b80f4b6f7be252e48894 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Wed, 10 Apr 2019 14:23:56 +0100
Subject: [PATCH 01/40] Autocorrelation extracted from SP.

---
 package/MDAnalysis/analysis/utils/__init__.py |   0
 .../analysis/utils/autocorrelation.py         | 105 ++++++++++++++++++
 package/MDAnalysis/analysis/waterdynamics.py  |  27 +----
 3 files changed, 109 insertions(+), 23 deletions(-)
 create mode 100644 package/MDAnalysis/analysis/utils/__init__.py
 create mode 100644 package/MDAnalysis/analysis/utils/autocorrelation.py

diff --git a/package/MDAnalysis/analysis/utils/__init__.py b/package/MDAnalysis/analysis/utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
new file mode 100644
index 00000000000..867eee2fa39
--- /dev/null
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -0,0 +1,105 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+# doi: 10.25080/majora-629e541a-00e
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""
+A flexible implementation of an autocorrelation function.
+"""
+
+import numpy as np
+
+# TODO add documentation
+def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+    # FIXME - make sure that it is not just a list of numbers
+
+    # correct the dataset for gaps (intermittency)
+    _correct_intermittency(intermittency, list_of_sets)
+
+    # calculate Survival Probability
+    tau_timeseries = np.arange(1, tau_max + 1)
+    sp_timeseries_data = [[] for _ in range(tau_max)]
+
+    for t in range(0, len(list_of_sets), window_jump):
+        Nt = len(list_of_sets[t])
+
+        if Nt == 0:
+            continue
+
+        for tau in tau_timeseries:
+            if t + tau >= len(list_of_sets):
+                break
+
+            # continuous: IDs that survive from t to t + tau and at every frame in between
+            Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1]))
+            sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+
+    sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+    return tau_timeseries, sp_timeseries, sp_timeseries_data
+
+
+def _correct_intermittency(intermittency, id_list, verbose=False):
+    """
+    Pre-process Consecutive Intermittency with a single pass over the data.
+    If an atom is absent for a number of frames equal or smaller
+    than the parameter intermittency, then correct the data and remove the absence.
+    ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
+
+    :param intermittency: the max gap allowed and to be corrected
+    :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    """
+    if intermittency == 0:
+        return
+
+    if verbose:
+        print('Correcting the selected IDs for intermittancy (gaps). ')
+
+    for i, ids in enumerate(id_list):
+        # initially update each frame as seen 0 ago (now)
+        seen_frames_ago = {i: 0 for i in ids}
+        for j in range(1, intermittency + 2):
+            for atomid in seen_frames_ago.keys():
+                # no more frames
+                if i + j >= len(id_list):
+                    continue
+
+                # if the atom is absent now
+                if not atomid in id_list[i + j]:
+                    # increase its absence counter
+                    seen_frames_ago[atomid] += 1
+                    continue
+
+                # the atom is found
+                if seen_frames_ago[atomid] == 0:
+                    # the atom was present in the last frame
+                    continue
+
+                # it was absent more times than allowed
+                if seen_frames_ago[atomid] > intermittency:
+                    continue
+
+                # the atom was absent but returned (within <= intermittency)
+                # add it to the frames where it was absent.
+                # Introduce the corrections.
+                for k in range(seen_frames_ago[atomid], 0, -1):
+                    id_list[i + j - k].add(atomid)
+
+                seen_frames_ago[atomid] = 0
\ No newline at end of file
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index a0f3b8cd379..553f5903cf7 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -458,6 +458,7 @@
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
+from .utils.autocorrelation import autocorrelation
 
 
 class HydrogenBondLifetimes(object):
@@ -1408,36 +1409,16 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             frame_no += 1
             frame_loaded_counter += 1
 
-        # correct the dataset for gaps (intermittency)
-        self._correct_intermittency(intermittency, self.selected_ids)
-
-        # calculate Survival Probability
-        tau_timeseries = np.arange(1, tau_max + 1)
-        sp_timeseries_data = [[] for _ in range(tau_max)]
-
         # adjust for the frames that were not loaded (step>tau_max + 1),
         # and for extra frames that were loaded (intermittency)
         window_jump = step - num_frames_to_skip
 
-        for t in range(0, len(self.selected_ids), window_jump):
-            Nt = len(self.selected_ids[t])
-
-            if Nt == 0:
-                self.print(verbose,
-                           "At frame {} the selection did not find any molecule. Moving on to the next frame".format(t))
-                continue
-
-            for tau in tau_timeseries:
-                if t + tau >= len(self.selected_ids):
-                    break
-
-                # continuous: IDs that survive from t to t + tau and at every frame in between
-                Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1]))
-                sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+        tau_timeseries, sp_timeseries, sp_timeseries_data = \
+            autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
 
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
 
         self.tau_timeseries = tau_timeseries
-        self.sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+        self.sp_timeseries = sp_timeseries
         return self

From 29a30278fdfcbea48e75bfd1df12341469fca02d Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 13:55:47 +0100
Subject: [PATCH 02/40] Exhaustive testing for autocorrelation. It will now
 return 1 at tau=0. Now the function can be embedded into
 HydrogenBondLifetime.

---
 .../analysis/utils/autocorrelation.py         | 15 ++-
 package/MDAnalysis/analysis/waterdynamics.py  | 18 ++--
 .../analysis/test_waterdynamics.py            | 98 ++++++++++++++++---
 3 files changed, 111 insertions(+), 20 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 867eee2fa39..6f162e15222 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -29,13 +29,21 @@
 
 # TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+    """
+
+    :param list_of_sets: Modifies in place!
+    :param tau_max:
+    :param window_jump:
+    :param intermittency:
+    :return:
+    """
     # FIXME - make sure that it is not just a list of numbers
 
     # correct the dataset for gaps (intermittency)
     _correct_intermittency(intermittency, list_of_sets)
 
     # calculate Survival Probability
-    tau_timeseries = np.arange(1, tau_max + 1)
+    tau_timeseries = list(range(1, tau_max + 1))
     sp_timeseries_data = [[] for _ in range(tau_max)]
 
     for t in range(0, len(list_of_sets), window_jump):
@@ -53,6 +61,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
             sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
 
     sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+
+    # at time 0 the value has to be one
+    tau_timeseries.insert(0, 0)
+    sp_timeseries.insert(0, 1)
+
     return tau_timeseries, sp_timeseries, sp_timeseries_data
 
 
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 553f5903cf7..d5143fa989d 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -297,10 +297,6 @@
   for tau, sp in zip(tau_timeseries, sp_timeseries):
         print("{time} {sp}".format(time=tau, sp=sp))
 
-  # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0:
-  tau_timeseries.insert(0, 0)
-  sp_timeseries.insert(1, 0)
-
   # plot
   plt.xlabel('Time')
   plt.ylabel('SP')
@@ -464,19 +460,19 @@
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
 
-    This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes"
+    This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
     connectivity. A slow decay indicate very stables hydrogen bonds, like in
     ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
-    (HBPR). In the continuos case we have:
+    (HBPR). In the continuous case we have:
 
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
-    :math:`ij` during time interval :math:`t_0+\tau` (continuos) and
+    :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
 
     .. math::
@@ -1373,6 +1369,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         # preload the frames (atom IDs) to a list of sets
         self.selected_ids = []
 
+        # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices,
+        # on which the parallel _single_frame can be applied.
+
         # skip frames that will not be used
         # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip
         # Intermittency means that we have to load the extra frames to know if the atom is actually missing.
@@ -1416,6 +1415,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         tau_timeseries, sp_timeseries, sp_timeseries_data = \
             autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
 
+        # warn the user if the NaN are found
+        if all(np.isnan(sp_timeseries[1:])):
+            # fixme - warn the user that the dataset. Use a standardised warning.
+            print('NaN Error: Most likely data was not found. Check your atom selections. ')
+
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
 
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index e6dc29b0f35..04e09c48611 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -23,9 +23,9 @@
 from __future__ import print_function, absolute_import
 import MDAnalysis
 from MDAnalysis.analysis import waterdynamics
+from MDAnalysis.analysis.utils.autocorrelation import autocorrelation
 
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
-from MDAnalysisTests.datafiles import PDB, XTC
 
 import pytest
 import numpy as np
@@ -92,7 +92,7 @@ def test_SurvivalProbability_t0tf(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=2, stop=6)
-        assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
 
 
 def test_SurvivalProbability_definedTaus(universe):
@@ -101,7 +101,7 @@ def test_SurvivalProbability_definedTaus(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
 
 
 def test_SurvivalProbability_zeroMolecules(universe):
@@ -109,7 +109,7 @@ def test_SurvivalProbability_zeroMolecules(universe):
     with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=3, stop=6, verbose=True)
-        assert all(np.isnan(sp.sp_timeseries))
+        assert all(np.isnan(sp.sp_timeseries[1:]))
 
 
 def test_SurvivalProbability_alwaysPresent(universe):
@@ -125,7 +125,7 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe):
     with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, step=5, stop=9, verbose=True)
-        assert_equal(sp.sp_timeseries, [1, 1])
+        assert_equal(sp.sp_timeseries, [1, 1, 1])
         # with tau_max=2 for all the frames we only read 6 of them
         # this is because the frames which are not used are skipped, and therefore 'select_atoms'
         assert universe.trajectory.n_frames > 6
@@ -150,8 +150,8 @@ def test_SurvivalProbability_intermittency1and2(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=9, verbose=True, intermittency=2)
-        assert all((x == set([9, 8]) for x in sp.selected_ids))
-        assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
+        assert all((x == {9, 8} for x in sp.selected_ids))
+        assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1])
 
 
 def test_SurvivalProbability_intermittency2lacking(universe):
@@ -163,7 +163,7 @@ def test_SurvivalProbability_intermittency2lacking(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=8, verbose=True, intermittency=2)
-        assert_almost_equal(sp.sp_timeseries, [0, 0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 0, 0, 0])
 
 
 def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
@@ -176,8 +176,9 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == set([2, 3]) for x in sp.selected_ids))
-        assert_almost_equal(sp.sp_timeseries, [1, 1])
+        assert all((x == {2, 3} for x in sp.selected_ids))
+        assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
+
 
 def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
     """
@@ -190,6 +191,79 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == set([1]) for x in sp.selected_ids))
+        assert all((x == {1} for x in sp.selected_ids))
         assert len(sp.selected_ids) == beforepopsing
-        assert_almost_equal(sp.sp_timeseries, [1])
\ No newline at end of file
+        assert_almost_equal(sp.sp_timeseries, [1, 1])
+
+
+def test_autocorrelation_alwaysPresent():
+    input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3)
+    assert all(np.equal(sp_timeseries, 1))
+
+
+def test_autocorrelation_definedTaus():
+    input_ids = [{9, 8, 7}, {8, 7, 6}, {7, 6, 5}, {6, 5, 4}, {5, 4, 3}, {4, 3, 2}, {3, 2, 1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3)
+    assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0])
+
+
+def test_autocorrelation_intermittency1and2():
+    """
+    Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
+    but then returns at the following step.
+    """
+    input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
+    # input_ids are modified in place, check if the gaps have been removed correctly
+    assert all((x == {9, 8} for x in input_ids))
+    assert_almost_equal(sp_timeseries, [1, 1, 1, 1])
+    
+
+def test_autocorrelation_intermittency2lacking():
+    """
+    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
+    """
+    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
+    assert_almost_equal(sp_timeseries, [1, 0, 0, 0])
+
+
+def test_autocorrelation_intermittency1_windowJump_intermittencyAll():
+    """
+    Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step.
+    No frames should be skipped so intermittency should be applied to all.
+    """
+    input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
+                                                                        intermittency=1, window_jump=5)
+    assert all((x == {2, 3} for x in input_ids))
+    assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_autocorrelation_intermittency0_windowBigJump():
+    """
+    The empty sets are ignored (no interrttency)
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_autocorrelation_intermittency0_windowBigJump_absence():
+    """
+    In the last frame the molecules are absent
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.])
+
+
+def test_autocorrelation_intermittency2_windowBigJump():
+    """
+    The intermittency corrects the last frame
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
+                                                                        intermittency=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 1, 1])

From b9d5996e6938abf6f0c066690b009d9ce3bd4ecc Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:42:32 +0100
Subject: [PATCH 03/40] A more general documentation in the autocorrelation.

---
 .../analysis/utils/autocorrelation.py         | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 6f162e15222..dc4a186f040 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -21,15 +21,14 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
-"""
-A flexible implementation of an autocorrelation function.
-"""
-
 import numpy as np
 
+
 # TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     """
+    The descrete implementation of the autocorrelation function.
+    fixme - change to the numpy documentation
 
     :param list_of_sets: Modifies in place!
     :param tau_max:
@@ -37,15 +36,16 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     :param intermittency:
     :return:
     """
-    # FIXME - make sure that it is not just a list of numbers
+    # fixme - add checks if this is a list of sets
+    # fixme - check dimensions
 
     # correct the dataset for gaps (intermittency)
     _correct_intermittency(intermittency, list_of_sets)
 
-    # calculate Survival Probability
     tau_timeseries = list(range(1, tau_max + 1))
-    sp_timeseries_data = [[] for _ in range(tau_max)]
+    timeseries_data = [[] for _ in range(tau_max)]
 
+    # calculate autocorrelation
     for t in range(0, len(list_of_sets), window_jump):
         Nt = len(list_of_sets[t])
 
@@ -58,15 +58,15 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
 
             # continuous: IDs that survive from t to t + tau and at every frame in between
             Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1]))
-            sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+            timeseries_data[tau - 1].append(Ntau / float(Nt))
 
-    sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+    timeseries = [np.mean(x) for x in timeseries_data]
 
     # at time 0 the value has to be one
     tau_timeseries.insert(0, 0)
-    sp_timeseries.insert(0, 1)
+    timeseries.insert(0, 1)
 
-    return tau_timeseries, sp_timeseries, sp_timeseries_data
+    return tau_timeseries, timeseries, timeseries_data
 
 
 def _correct_intermittency(intermittency, id_list, verbose=False):

From f4e3570abb21ba055465c101bd5b48dfff761427 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:43:03 +0100
Subject: [PATCH 04/40] Updated test cases to illuminate the bug in #2247.

---
 .../MDAnalysisTests/analysis/test_waterdynamics.py    | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 04e09c48611..1317c77afa2 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -45,8 +45,17 @@ def universe():
 def test_HydrogenBondLifetimes(universe):
     hbl = waterdynamics.HydrogenBondLifetimes(
         universe, SELECTION1, SELECTION1, 0, 5, 3)
+    hbl.run(stop=5)
+    assert_almost_equal(hbl.timeseries[2], 0.75)
+
+
+def test_HydrogenBondLifetimes_growing_continuous(universe):
+    # The autocorrelation cannot grow
+    hbl = waterdynamics.HydrogenBondLifetimes(
+        universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5)
     hbl.run()
-    assert_almost_equal(hbl.timeseries[2][1], 0.75, 5)
+    # Index 0 in frameX[0] refers to the continuous, 1 to intermittent
+    assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])])
 
 
 def test_WaterOrientationalRelaxation(universe):

From d3a6a8a9eb8a9c18adf4aff16730d7681df646bc Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:43:55 +0100
Subject: [PATCH 05/40] First switch to using autocorrelation that passes the
 tests.

---
 package/MDAnalysis/analysis/waterdynamics.py | 213 +++----------------
 1 file changed, 27 insertions(+), 186 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index d5143fa989d..ad32d75074f 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -464,8 +464,8 @@ class HydrogenBondLifetimes(object):
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
-    connectivity. A slow decay indicate very stables hydrogen bonds, like in
-    ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
+    connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice).
+    The HBL is also know as "Hydrogen Bond Population Relaxation"
     (HBPR). In the continuous case we have:
 
     .. math::
@@ -507,6 +507,7 @@ class HydrogenBondLifetimes(object):
 
     def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
                  nproc=1):
+        # fixme - use the same interface you used in SP
         self.universe = universe
         self.selection1 = selection1
         self.selection2 = selection2
@@ -516,196 +517,33 @@ def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
         self.nproc = nproc
         self.timeseries = None
 
-    def _getC_i(self, HBP, t0, t):
-        """
-        This function give the intermitent Hydrogen Bond Lifetime
-        C_i = <h(t0)h(t)>/<h(t0)> between t0 and t
-        """
-        C_i = 0
-        for i in range(len(HBP[t0])):
-            for j in range(len(HBP[t])):
-                if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]):
-                    C_i += 1
-                    break
-        if len(HBP[t0]) == 0:
-            return 0.0
-        else:
-            return float(C_i) / len(HBP[t0])
 
-    def _getC_c(self, HBP, t0, t):
-        """
-        This function give the continous Hydrogen Bond Lifetime
-        C_c = <h(t0)h'(t)>/<h(t0)> between t0 and t
-        """
-        C_c = 0
-        dt = 1
-        begt0 = t0
-        HBP_cp = HBP
-        HBP_t0 = HBP[t0]
-        newHBP = []
-        if t0 == t:
-            return 1.0
-        while t0 + dt <= t:
-            for i in range(len(HBP_t0)):
-                for j in range(len(HBP_cp[t0 + dt])):
-                    if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and
-                        HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]):
-                        newHBP.append(HBP_t0[i])
-                        break
-            C_c = len(newHBP)
-            t0 += dt
-            HBP_t0 = newHBP
-            newHBP = []
-        if len(HBP[begt0]) == 0:
-            return 0
-        else:
-            return C_c / float(len(HBP[begt0]))
+    def run(self, **kwargs):
+        # Get all hydrogen bonds
+        hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
+                                                                 self.selection1,
+                                                                 self.selection2,
+                                                                 distance=3.5,
+                                                                 angle=120.0)
+        # fixme - use it only for the time requested
+        # fixme - this currently ignores the constructor parameters t0 tf etc
+        hydrogen_bonds.run(**kwargs)
 
-    def _intervC_c(self, HBP, t0, tf, dt):
-        """
-        This function gets all the data for the h(t0)h(t0+dt)', where
-        t0 = 1,2,3,...,tf. This function give us one point of the final plot
-        HBL vs t
-        """
-        a = 0
-        count = 0
-        for i in range(len(HBP)):
-            if (t0 + dt <= tf):
-                if t0 == t0 + dt:
-                    b = self._getC_c(HBP, t0, t0)
-                    break
-                b = self._getC_c(HBP, t0, t0 + dt)
-                t0 += dt
-                a += b
-                count += 1
-        if count == 0:
-            return 1.0
-        return a / count
-
-    def _intervC_i(self, HBP, t0, tf, dt):
-        """
-        This function gets all the data for the h(t0)h(t0+dt), where
-        t0 = 1,2,3,...,tf. This function give us a point of the final plot
-        HBL vs t
-        """
-        a = 0
-        count = 0
-        for i in range(len(HBP)):
-            if (t0 + dt <= tf):
-                b = self._getC_i(HBP, t0, t0 + dt)
-                t0 += dt
-                a += b
-                count += 1
-        return a / count
-
-    def _finalGraphGetC_i(self, HBP, t0, tf, maxdt):
-        """
-        This function gets the final data of the C_i graph.
-        """
-        output = []
-        for dt in range(maxdt):
-            a = self._intervC_i(HBP, t0, tf, dt)
-            output.append(a)
-        return output
+        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
+        hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-    def _finalGraphGetC_c(self, HBP, t0, tf, maxdt):
-        """
-        This function gets the final data of the C_c graph.
-        """
-        output = []
-        for dt in range(maxdt):
-            a = self._intervC_c(HBP, t0, tf, dt)
-            output.append(a)
-        return output
+        # Calculate the
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax)
 
-    def _getGraphics(self, HBP, t0, tf, maxdt):
-        """
-        Function that join all the results into a plot.
-        """
-        a = []
-        cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt)
-        inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt)
-        for i in range(len(cont)):
-            fix = [cont[i], inte[i]]
-            a.append(fix)
-        return a
+        # fixme - document
+        self.hydrogen_bonds = hydrogen_bonds.timeseries
 
-    def _HBA(self, ts, conn, universe, selAtom1, selAtom2,
-             verbose=False):
-        """
-        Main function for calculate C_i and C_c in parallel.
-        """
-        finalGetResidue1 = selAtom1
-        finalGetResidue2 = selAtom2
-        frame = ts.frame
-        h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
-                                                            finalGetResidue1,
-                                                            finalGetResidue2,
-                                                            distance=3.5,
-                                                            angle=120.0,
-                                                            start=frame - 1,
-                                                            stop=frame)
-        while True:
-            try:
-                h.run(verbose=verbose)
-                break
-            except:
-                print("error")
-                print("trying again")
-                sys.stdout.flush()
-        sys.stdout.flush()
-        conn.send(h.timeseries[0])
-        conn.close()
+        # user can investigate the distribution and sample size
+        self.sp_timeseries_data = timeseries_data
+        self.tau_timeseries = tau_timeseries
+        self.timeseries = timeseries
+        return self
 
-    def run(self, **kwargs):
-        """Analyze trajectory and produce timeseries"""
-        h_list = []
-        i = 0
-        if (self.nproc > 1):
-            while i < len(self.universe.trajectory):
-                jobs = []
-                k = i
-                for j in range(self.nproc):
-                        # start
-                    print("ts=", i + 1)
-                    if i >= len(self.universe.trajectory):
-                        break
-                    conn_parent, conn_child = multiprocessing.Pipe(False)
-                    while True:
-                        try:
-                            # new thread
-                            jobs.append(
-                                (multiprocessing.Process(
-                                    target=self._HBA,
-                                    args=(self.universe.trajectory[i],
-                                          conn_child, self.universe,
-                                          self.selection1, self.selection2,)),
-                                 conn_parent))
-                            break
-                        except:
-                            print("error in jobs.append")
-                    jobs[j][0].start()
-                    i = i + 1
-
-                for j in range(self.nproc):
-                    if k >= len(self.universe.trajectory):
-                        break
-                    rec01 = jobs[j][1]
-                    received = rec01.recv()
-                    h_list.append(received)
-                    jobs[j][0].join()
-                    k += 1
-            self.timeseries = self._getGraphics(
-                h_list, 0, self.tf - 1, self.dtmax)
-        else:
-            h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
-                                                                     self.selection1,
-                                                                     self.selection2,
-                                                                     distance=3.5,
-                                                                     angle=120.0)
-            h_list.run(**kwargs)
-            self.timeseries = self._getGraphics(
-                h_list.timeseries, self.t0, self.tf, self.dtmax)
 
 
 class WaterOrientationalRelaxation(object):
@@ -1347,6 +1185,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             tau from 1 to `tau_max`. Saved in the field tau_timeseries.
         sp_timeseries : list
             survival probability for each value of `tau`. Saved in the field sp_timeseries.
+        sp_timeseries_data: list
+            raw datapoints from which the average is taken (sp_timeseries).
+            Time dependancy and distribution can be extracted.
         """
 
         # backward compatibility (and priority)

From 236fad64b8a902744dd425988f698703e7eed841 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Mon, 15 Apr 2019 23:28:11 +0100
Subject: [PATCH 06/40] The documentation updated with the code. The
 HydrogenBondAnalysis should ideally be extracted from the
 HydrogenBondLifetime and fed into it in the constructor.

---
 .../analysis/utils/autocorrelation.py         |  59 +++++--
 package/MDAnalysis/analysis/waterdynamics.py  | 154 +++++++++---------
 2 files changed, 124 insertions(+), 89 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index dc4a186f040..50891765009 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -24,23 +24,48 @@
 import numpy as np
 
 
-# TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
-    """
-    The descrete implementation of the autocorrelation function.
-    fixme - change to the numpy documentation
-
-    :param list_of_sets: Modifies in place!
-    :param tau_max:
-    :param window_jump:
-    :param intermittency:
-    :return:
+    r"""The descrete implementation of the autocorrelation function.
+
+    Here is a random equation that shows something.
+
+    .. math::
+       C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
+
+
+
+    fixme -  list_of_sets: Modifies in place!
+    Parameters
+    ----------
+    list_of_sets : list
+      List of sets,
+    tau_max : int
+      The last tau (inclusive) for which to carry out autocorrelation.
+    window_jump : int, optional
+      The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
+    intermittency : int, optional
+      Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns,
+      intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any
+      frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will
+      be counted as present.
+
+    Returns
+    --------
+    tau_timeseries : list of int
+        the tau for which the autocorrelation was calculated
+    timeseries : list of int
+        the autocorelation values for each of the tau values
+    timeseries_data : list of list of int
+        the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated.
+
+    .. versionadded:: 0.19.2
     """
     # fixme - add checks if this is a list of sets
     # fixme - check dimensions
+    # fixme - check parameters?
 
     # correct the dataset for gaps (intermittency)
-    _correct_intermittency(intermittency, list_of_sets)
+    correct_intermittency(intermittency, list_of_sets)
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]
@@ -69,15 +94,21 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     return tau_timeseries, timeseries, timeseries_data
 
 
-def _correct_intermittency(intermittency, id_list, verbose=False):
+def correct_intermittency(intermittency, id_list, verbose=False):
     """
     Pre-process Consecutive Intermittency with a single pass over the data.
     If an atom is absent for a number of frames equal or smaller
     than the parameter intermittency, then correct the data and remove the absence.
     ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
 
-    :param intermittency: the max gap allowed and to be corrected
-    :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    Parameters
+    ----------
+    intermittency: int
+        the max gap allowed and to be corrected
+    id_list: fixme
+        modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    verbose: Boolean, optional
+        print
     """
     if intermittency == 0:
         return
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index ad32d75074f..2635bacbbb3 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -450,15 +450,16 @@
 from six.moves import range, zip_longest
 
 import numpy as np
-import multiprocessing
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation
 
 
+
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
+    fixme - update the definition of intermittency (refer the user to autocorrelation)
 
     This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
@@ -492,48 +493,100 @@ class HydrogenBondLifetimes(object):
       It could be any selection available in MDAnalysis, not just water.
     selection2 : str
       Selection string to analize its HBL against selection1
-    t0 : int
-      frame  where analysis begins
-    tf : int
-      frame where analysis ends
-    dtmax : int
-      Maximum dt size, `dtmax` < `tf` or it will crash.
-    nproc : int
-      Number of processors to use, by default is 1.
-
+    verbose : Boolean, optional
+      When True, prints progress and comments to the console.
 
     .. versionadded:: 0.11.0
     """
 
-    def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
-                 nproc=1):
-        # fixme - use the same interface you used in SP
+    def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False):
         self.universe = universe
         self.selection1 = selection1
         self.selection2 = selection2
-        self.t0 = t0
-        self.tf = tf - 1
-        self.dtmax = dtmax
-        self.nproc = nproc
-        self.timeseries = None
+        self.verbose = verbose
 
+        # backward compatibility
+        self.start = self.stop = self.tau_max = None
+        if t0 is not None:
+            self.start = t0
+            warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning)
+
+        if tf is not None:
+            self.stop = tf
+            warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning)
+
+        if dtmax is not None:
+            self.tau_max = dtmax
+            warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning)
+
+
+    def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs):
+        """
+        Computes and returns the Survival Probability (SP) timeseries
+
+        Parameters
+        ----------
+        start : int, optional
+            Zero-based index of the first frame to be analysed
+        stop : int, optional
+            Zero-based index of the last frame to be analysed (inclusive)
+        step : int, optional
+            Jump every `step`-th frame. This is compatible but independant of the taus used.
+            Note that `step` and `tau_max` work consistently with intermittency.
+        tau_max : int, optional
+            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
+        intermittency : int, optional
+            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
+            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
+            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
+            disappear for up to two consecutive frames yet be treated as being present at all frames.
+            The default is continuous (0).
+        verbose : Boolean, optional
+            Print the progress to the console
+
+        Returns
+        -------
+        tau_timeseries : list
+            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
+        timeseries : list
+            survival probability for each value of `tau`. Saved in the field sp_timeseries.
+        timeseries_data: list
+            raw datapoints from which the average is taken (sp_timeseries).
+            Time dependancy and distribution can be extracted.
+        """
+
+        # backward compatibility (and priority)
+        start = self.start if self.start is not None else start
+        stop = self.stop if self.stop is not None else stop
+        tau_max = self.tau_max if self.tau_max is not None else tau_max
+
+        # sanity checks
+        if stop is not None and stop >= len(self.universe.trajectory):
+            raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.")
+
+        if stop is None:
+            stop = len(self.universe.trajectory)
+        else:
+            stop = stop + 1
+
+        if tau_max > (stop - start):
+            raise ValueError("Too few frames selected for given tau_max.")
 
-    def run(self, **kwargs):
         # Get all hydrogen bonds
+        # fixme - we should not have the frozen definition of the hydrogen bonds
+        # fixme - this will be upated with the new HydrogenBond interface
         hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
                                                                  self.selection1,
                                                                  self.selection2,
                                                                  distance=3.5,
                                                                  angle=120.0)
-        # fixme - use it only for the time requested
-        # fixme - this currently ignores the constructor parameters t0 tf etc
-        hydrogen_bonds.run(**kwargs)
+        # Compute the minimal number of hydrogen bonds
+        hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs)
 
         # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
-        hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
+        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-        # Calculate the
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax)
 
         # fixme - document
         self.hydrogen_bonds = hydrogen_bonds.timeseries
@@ -1094,62 +1147,13 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa
             self.tau_max = dtmax
             warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning)
 
+
     def print(self, verbose, *args):
         if self.verbose:
             print(args)
         elif verbose:
             print(args)
 
-    def _correct_intermittency(self, intermittency, selected_ids):
-        """
-        Pre-process Consecutive Intermittency with a single pass over the data.
-        If an atom is absent for a number of frames equal or smaller
-        than the `intermittency`, then correct the data and remove the absence.
-        ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
-
-        Parameters
-        ----------
-        intermittency : int
-            the max gap allowed and to be corrected
-        selected_ids: list of ids
-            modifies the selecteded IDs in place by adding atoms which left for <= `intermittency`
-        """
-        self.print('Correcting the selected IDs for intermittancy (gaps). ')
-        if intermittency == 0:
-            return
-
-        for i, ids in enumerate(selected_ids):
-            # initially update each frame as seen 0 ago (now)
-            seen_frames_ago = {i: 0 for i in ids}
-            for j in range(1, intermittency + 2):
-                for atomid in seen_frames_ago.keys():
-                    # no more frames
-                    if i + j >= len(selected_ids):
-                        continue
-
-                    # if the atom is absent now
-                    if not atomid in selected_ids[i + j]:
-                        # increase its absence counter
-                        seen_frames_ago[atomid] += 1
-                        continue
-
-                    # the atom is found
-                    if seen_frames_ago[atomid] == 0:
-                        # the atom was present in the last frame
-                        continue
-
-                    # it was absent more times than allowed
-                    if seen_frames_ago[atomid] > intermittency:
-                        continue
-
-                    # the atom was absent but returned (within <= intermittency)
-                    # add it to the frames where it was absent.
-                    # Introduce the corrections.
-                    for k in range(seen_frames_ago[atomid], 0, -1):
-                        selected_ids[i + j - k].add(atomid)
-
-                    seen_frames_ago[atomid] = 0
-
 
     def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False):
         """

From b3acf590d7ef319fba60e6eb596fe729a302142f Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Thu, 18 Apr 2019 13:20:34 +0100
Subject: [PATCH 07/40] Correction for the test to make it consistent with the
 previous author's test. Our end index is inclusive (5th frame, giving 6
 frames). But the other author's analysis was done on 0-4 frames (5 frames).

---
 package/MDAnalysis/analysis/waterdynamics.py  | 24 +++++++------------
 .../analysis/test_waterdynamics.py            |  5 ++--
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 2635bacbbb3..a77751f3618 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -445,16 +445,16 @@
 """
 from __future__ import print_function, division, absolute_import
 
-import warnings
-
 from six.moves import range, zip_longest
-
+import logging
+import warnings
 import numpy as np
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation
 
+logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')
 
 
 class HydrogenBondLifetimes(object):
@@ -475,6 +475,7 @@ class HydrogenBondLifetimes(object):
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
     :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
+    -fixme - update the intermittent definition
 
     .. math::
        C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
@@ -575,6 +576,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F
         # Get all hydrogen bonds
         # fixme - we should not have the frozen definition of the hydrogen bonds
         # fixme - this will be upated with the new HydrogenBond interface
+        # fixme - remove this from here and insert it into the example
         hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
                                                                  self.selection1,
                                                                  self.selection2,
@@ -586,7 +588,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F
         # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
         found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max)
 
         # fixme - document
         self.hydrogen_bonds = hydrogen_bonds.timeseries
@@ -1148,13 +1150,6 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa
             warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning)
 
 
-    def print(self, verbose, *args):
-        if self.verbose:
-            print(args)
-        elif verbose:
-            print(args)
-
-
     def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False):
         """
         Computes and returns the Survival Probability (SP) timeseries
@@ -1232,7 +1227,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         frame_no = start
         while frame_no < stop:      # we have already added 1 to stop, therefore <
             if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window:
-                self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip)
+                logger.info("Skipping the next %d frames:" % num_frames_to_skip)
                 frame_no += num_frames_to_skip
                 frame_loaded_counter = 0
                 # Correct the number of frames to be loaded after the first window (which starts at t=0, and
@@ -1243,7 +1238,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             # update the frame number
             self.universe.trajectory[frame_no]
 
-            self.print(verbose, "Loading frame:", self.universe.trajectory.ts)
+            logging.info("Loading frame:", self.universe.trajectory.ts)
             atoms = self.universe.select_atoms(self.selection)
 
             # SP of residues or of atoms
@@ -1262,8 +1257,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
 
         # warn the user if the NaN are found
         if all(np.isnan(sp_timeseries[1:])):
-            # fixme - warn the user that the dataset. Use a standardised warning.
-            print('NaN Error: Most likely data was not found. Check your atom selections. ')
+            logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ')
 
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 1317c77afa2..978b82bc29b 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -44,9 +44,10 @@ def universe():
 
 def test_HydrogenBondLifetimes(universe):
     hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, 0, 5, 3)
+        universe, SELECTION1, SELECTION1, 0, 4, 3)
     hbl.run(stop=5)
-    assert_almost_equal(hbl.timeseries[2], 0.75)
+    assert_almost_equal(hbl.timeseries[3], 0.75)
+
 
 
 def test_HydrogenBondLifetimes_growing_continuous(universe):

From edb1f8ffb57243ce4c0afad094504a13ce9f1c8f Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Thu, 18 Apr 2019 18:15:26 +0100
Subject: [PATCH 08/40] Extracting the autocorrelation and intermittency into
 their own functions with their own test cases.

Removed the class HydrogenBondLifetimes and moved the autocorrelation
functionality into the hbond_analysis class as a .autocorrelation
method. This method should replace also hbond_autocorrel.py file. Test
cases for the new method .autocorrelation are added.

Added a test cases that, with our definition
of intermittency, would fail in the case of the hbond_autocorrel.py.
---
 .../analysis/hbonds/hbond_analysis.py         |  55 +++++
 .../analysis/hbonds/hbond_autocorrel.py       |   5 +
 .../analysis/utils/autocorrelation.py         |  40 ++-
 package/MDAnalysis/analysis/waterdynamics.py  | 138 ++---------
 .../MDAnalysisTests/analysis/test_hbonds.py   |  33 +++
 .../analysis/test_waterdynamics.py            | 232 +++++++++---------
 6 files changed, 251 insertions(+), 252 deletions(-)

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index b0aefe862f6..6ac363ff59c 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -333,6 +333,8 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
 from MDAnalysis.lib import distances
 from MDAnalysis.lib.util import deprecate
 
+from ..utils.autocorrelation import autocorrelation, correct_intermittency
+
 
 logger = logging.getLogger('MDAnalysis.analysis.hbonds')
 
@@ -1425,3 +1427,56 @@ def _make_dict(donors, hydrogens):
         h2donor.update(_make_dict(s1d, s1h))
 
         return h2donor
+
+
+    def autocorrelation(self, tau_max=20, window_step=1, intermittency=0):
+        """
+        Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds.
+
+        Parameters
+        ----------
+        window_step : int, optional
+            Jump every `step`-th frame. This is compatible but independant of the taus used.
+            Note that `step` and `tau_max` work consistently with intermittency.
+        tau_max : int, optional
+            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
+        intermittency : int, optional
+            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
+            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
+            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
+            disappear for up to two consecutive frames yet be treated as being present at all frames.
+            The default is continuous (0).
+
+        Returns
+        -------
+        tau_timeseries : list
+            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
+        timeseries : list
+            autcorrelation value for each value of `tau`. Saved in the field timeseries.
+        timeseries_data: list
+            raw datapoints from which the average is taken (timeseries).
+            Time dependancy and distribution can be extracted.
+        """
+
+        if self._timeseries is None:
+            logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found")
+            logging.error("Autocorrelation: Please use the .run() before calling this function")
+            return
+
+        if self.step != 1:
+            logging.warning("Autocorrelation function should be carried out on consecutive frames. ")
+            logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'")
+
+        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..]
+        hydrogen_bonds = self.timeseries
+        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds]
+
+        intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max,
+                                                                      window_step=window_step)
+        self.acf_tau_timeseries = tau_timeseries
+        self.acf_timeseries = timeseries
+        # user can investigate the distribution and sample size
+        self.acf_timeseries_data = timeseries_data
+
+        return self
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
index 920ce718707..67f03a325c3 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
@@ -296,6 +296,11 @@ def __init__(self, universe,
                  nruns=1,  # number of times to iterate through the trajectory
                  nsamples=50,  # number of different points to sample in a run
                  pbc=True):
+
+        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
+                      "which has .autocorrelation function",
+                      category=DeprecationWarning)
+
         self.u = universe
         # check that slicing is possible
         try:
diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 50891765009..efb25c40623 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -22,9 +22,10 @@
 #
 
 import numpy as np
+from copy import deepcopy
 
 
-def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+def autocorrelation(list_of_sets, tau_max, window_step=1):
     r"""The descrete implementation of the autocorrelation function.
 
     Here is a random equation that shows something.
@@ -33,7 +34,6 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
 
-
     fixme -  list_of_sets: Modifies in place!
     Parameters
     ----------
@@ -41,8 +41,9 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
       List of sets,
     tau_max : int
       The last tau (inclusive) for which to carry out autocorrelation.
-    window_jump : int, optional
+    window_step : int, optional
       The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
+    fixme - move intermittency to the right place
     intermittency : int, optional
       Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns,
       intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any
@@ -64,14 +65,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     # fixme - check dimensions
     # fixme - check parameters?
 
-    # correct the dataset for gaps (intermittency)
-    correct_intermittency(intermittency, list_of_sets)
-
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]
 
     # calculate autocorrelation
-    for t in range(0, len(list_of_sets), window_jump):
+    for t in range(0, len(list_of_sets), window_step):
         Nt = len(list_of_sets[t])
 
         if Nt == 0:
@@ -93,8 +91,8 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
 
     return tau_timeseries, timeseries, timeseries_data
 
-
-def correct_intermittency(intermittency, id_list, verbose=False):
+# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds)
+def correct_intermittency(list_of_sets, intermittency):
     """
     Pre-process Consecutive Intermittency with a single pass over the data.
     If an atom is absent for a number of frames equal or smaller
@@ -103,30 +101,27 @@ def correct_intermittency(intermittency, id_list, verbose=False):
 
     Parameters
     ----------
+    id_list: list of sets
+        returns a new list with the IDs with added IDs which disappeared for <= :param intermittency
     intermittency: int
         the max gap allowed and to be corrected
-    id_list: fixme
-        modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
-    verbose: Boolean, optional
-        print
     """
     if intermittency == 0:
-        return
+        return list_of_sets
 
-    if verbose:
-        print('Correcting the selected IDs for intermittancy (gaps). ')
+    list_of_sets = deepcopy(list_of_sets)
 
-    for i, ids in enumerate(id_list):
+    for i, ids in enumerate(list_of_sets):
         # initially update each frame as seen 0 ago (now)
         seen_frames_ago = {i: 0 for i in ids}
         for j in range(1, intermittency + 2):
             for atomid in seen_frames_ago.keys():
                 # no more frames
-                if i + j >= len(id_list):
+                if i + j >= len(list_of_sets):
                     continue
 
                 # if the atom is absent now
-                if not atomid in id_list[i + j]:
+                if not atomid in list_of_sets[i + j]:
                     # increase its absence counter
                     seen_frames_ago[atomid] += 1
                     continue
@@ -140,10 +135,11 @@ def correct_intermittency(intermittency, id_list, verbose=False):
                 if seen_frames_ago[atomid] > intermittency:
                     continue
 
-                # the atom was absent but returned (within <= intermittency)
+                # the atom was absent but returned (within <= intermittency_value)
                 # add it to the frames where it was absent.
                 # Introduce the corrections.
                 for k in range(seen_frames_ago[atomid], 0, -1):
-                    id_list[i + j - k].add(atomid)
+                    list_of_sets[i + j - k].add(atomid)
 
-                seen_frames_ago[atomid] = 0
\ No newline at end of file
+                seen_frames_ago[atomid] = 0
+    return list_of_sets
\ No newline at end of file
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index a77751f3618..1f578f2a7ed 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -450,32 +450,30 @@
 import warnings
 import numpy as np
 
-import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
-from .utils.autocorrelation import autocorrelation
+from .utils.autocorrelation import autocorrelation, correct_intermittency
 
 logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')
 
 
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
-    fixme - update the definition of intermittency (refer the user to autocorrelation)
+    fixme - deprecate it
 
-    This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
+    This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
-    connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice).
-    The HBL is also know as "Hydrogen Bond Population Relaxation"
-    (HBPR). In the continuous case we have:
+    connectivity. A slow decay indicate very stables hydrogen bonds, like in
+    ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
+    (HBPR). In the continuos case we have:
 
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
-    :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
+    :math:`ij` during time interval :math:`t_0+\tau` (continuos) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
-    -fixme - update the intermittent definition
 
     .. math::
        C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
@@ -494,111 +492,24 @@ class HydrogenBondLifetimes(object):
       It could be any selection available in MDAnalysis, not just water.
     selection2 : str
       Selection string to analize its HBL against selection1
-    verbose : Boolean, optional
-      When True, prints progress and comments to the console.
+    t0 : int
+      frame  where analysis begins
+    tf : int
+      frame where analysis ends
+    dtmax : int
+      Maximum dt size, `dtmax` < `tf` or it will crash.
+    nproc : int
+      Number of processors to use, by default is 1.
+
 
     .. versionadded:: 0.11.0
     """
 
     def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False):
-        self.universe = universe
-        self.selection1 = selection1
-        self.selection2 = selection2
-        self.verbose = verbose
-
-        # backward compatibility
-        self.start = self.stop = self.tau_max = None
-        if t0 is not None:
-            self.start = t0
-            warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning)
-
-        if tf is not None:
-            self.stop = tf
-            warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning)
-
-        if dtmax is not None:
-            self.tau_max = dtmax
-            warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning)
-
-
-    def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs):
-        """
-        Computes and returns the Survival Probability (SP) timeseries
-
-        Parameters
-        ----------
-        start : int, optional
-            Zero-based index of the first frame to be analysed
-        stop : int, optional
-            Zero-based index of the last frame to be analysed (inclusive)
-        step : int, optional
-            Jump every `step`-th frame. This is compatible but independant of the taus used.
-            Note that `step` and `tau_max` work consistently with intermittency.
-        tau_max : int, optional
-            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
-        intermittency : int, optional
-            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
-            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
-            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
-            disappear for up to two consecutive frames yet be treated as being present at all frames.
-            The default is continuous (0).
-        verbose : Boolean, optional
-            Print the progress to the console
-
-        Returns
-        -------
-        tau_timeseries : list
-            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
-        timeseries : list
-            survival probability for each value of `tau`. Saved in the field sp_timeseries.
-        timeseries_data: list
-            raw datapoints from which the average is taken (sp_timeseries).
-            Time dependancy and distribution can be extracted.
-        """
-
-        # backward compatibility (and priority)
-        start = self.start if self.start is not None else start
-        stop = self.stop if self.stop is not None else stop
-        tau_max = self.tau_max if self.tau_max is not None else tau_max
-
-        # sanity checks
-        if stop is not None and stop >= len(self.universe.trajectory):
-            raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.")
-
-        if stop is None:
-            stop = len(self.universe.trajectory)
-        else:
-            stop = stop + 1
-
-        if tau_max > (stop - start):
-            raise ValueError("Too few frames selected for given tau_max.")
-
-        # Get all hydrogen bonds
-        # fixme - we should not have the frozen definition of the hydrogen bonds
-        # fixme - this will be upated with the new HydrogenBond interface
-        # fixme - remove this from here and insert it into the example
-        hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
-                                                                 self.selection1,
-                                                                 self.selection2,
-                                                                 distance=3.5,
-                                                                 angle=120.0)
-        # Compute the minimal number of hydrogen bonds
-        hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs)
-
-        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
-        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
-
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max)
-
-        # fixme - document
-        self.hydrogen_bonds = hydrogen_bonds.timeseries
-
-        # user can investigate the distribution and sample size
-        self.sp_timeseries_data = timeseries_data
-        self.tau_timeseries = tau_timeseries
-        self.timeseries = timeseries
-        return self
-
+        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
+                      "which has .autocorrelation function",
+                      category=DeprecationWarning)
+        pass
 
 
 class WaterOrientationalRelaxation(object):
@@ -1207,7 +1118,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             raise ValueError("Too few frames selected for given tau_max.")
 
         # preload the frames (atom IDs) to a list of sets
-        self.selected_ids = []
+        self._selected_ids = []
 
         # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices,
         # on which the parallel _single_frame can be applied.
@@ -1243,7 +1154,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
 
             # SP of residues or of atoms
             ids = atoms.residues.resids if residues else atoms.ids
-            self.selected_ids.append(set(ids))
+            self._selected_ids.append(set(ids))
 
             frame_no += 1
             frame_loaded_counter += 1
@@ -1252,8 +1163,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         # and for extra frames that were loaded (intermittency)
         window_jump = step - num_frames_to_skip
 
-        tau_timeseries, sp_timeseries, sp_timeseries_data = \
-            autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
+        self._intermittent_selected_ids = correct_intermittency(self._selected_ids, intermittency=intermittency)
+        tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(self._intermittent_selected_ids,
+                                                                            tau_max, window_jump)
 
         # warn the user if the NaN are found
         if all(np.isnan(sp_timeseries[1:])):
diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py
index 14223178abe..7b0076ae1cd 100644
--- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py
+++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py
@@ -397,3 +397,36 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table):
         # https://github.com/MDAnalysis/mdanalysis/issues/801)
         for name, ref in reference_table.items():
             assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)")
+
+
+class TestHydrogenBondAnalysisAutocorrelation(object):
+    @staticmethod
+    @pytest.fixture(scope='class')
+    def universe():
+        return MDAnalysis.Universe(waterPSF, waterDCD)
+
+    @staticmethod
+    @pytest.fixture(scope='class')
+    def selection():
+        return "byres name OH2"
+
+    def test_HydrogenBondLifetimes(self, universe, selection):
+        hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
+                                                                selection,
+                                                                selection,
+                                                                distance=3.5,
+                                                                angle=120.0)
+        hbonds.run(start=0, stop=4)
+        hbonds.autocorrelation(tau_max=3)
+        assert_almost_equal(hbonds.acf_timeseries[3], 0.75)
+
+    def test_HydrogenBondLifetimes_growing_continuous(self, universe, selection):
+        hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
+                                                                selection,
+                                                                selection,
+                                                                distance=3.5,
+                                                                angle=120.0)
+        hbonds.run(start=0, stop=9)
+        hbonds.autocorrelation(tau_max=5)
+        assert all([frame >= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])])
+
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 978b82bc29b..e9a4440c7c5 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -23,7 +23,7 @@
 from __future__ import print_function, absolute_import
 import MDAnalysis
 from MDAnalysis.analysis import waterdynamics
-from MDAnalysis.analysis.utils.autocorrelation import autocorrelation
+from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency
 
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
 
@@ -42,23 +42,6 @@ def universe():
     return MDAnalysis.Universe(waterPSF, waterDCD)
 
 
-def test_HydrogenBondLifetimes(universe):
-    hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, 0, 4, 3)
-    hbl.run(stop=5)
-    assert_almost_equal(hbl.timeseries[3], 0.75)
-
-
-
-def test_HydrogenBondLifetimes_growing_continuous(universe):
-    # The autocorrelation cannot grow
-    hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5)
-    hbl.run()
-    # Index 0 in frameX[0] refers to the continuous, 1 to intermittent
-    assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])])
-
-
 def test_WaterOrientationalRelaxation(universe):
     wor = waterdynamics.WaterOrientationalRelaxation(
         universe, SELECTION1, 0, 5, 2)
@@ -96,77 +79,24 @@ def test_MeanSquareDisplacement_zeroMolecules(universe):
     assert_almost_equal(msd_zero.timeseries[1], 0.0)
 
 
-def test_SurvivalProbability_t0tf(universe):
-    with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )]
-        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=2, stop=6)
-        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
-
-
-def test_SurvivalProbability_definedTaus(universe):
-    with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)]
-        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
-
-
-def test_SurvivalProbability_zeroMolecules(universe):
-    # no atom IDs found
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=3, stop=6, verbose=True)
-        assert all(np.isnan(sp.sp_timeseries[1:]))
-
-
-def test_SurvivalProbability_alwaysPresent(universe):
-    # always the same atom IDs found, 7 and 8
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert all(np.equal(sp.sp_timeseries, 1))
-
-
-def test_SurvivalProbability_stepLargerThanDtmax(universe):
-    # Testing if the frames are skipped correctly
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=2, step=5, stop=9, verbose=True)
-        assert_equal(sp.sp_timeseries, [1, 1, 1])
-        # with tau_max=2 for all the frames we only read 6 of them
-        # this is because the frames which are not used are skipped, and therefore 'select_atoms'
-        assert universe.trajectory.n_frames > 6
-        assert_equal(select_atoms_mock.call_count, 6)
-
-
-def test_SurvivalProbability_stepEqualDtMax(universe):
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=4, step=5, stop=9, verbose=True)
-        # all frames from 0, with 9 inclusive
-        assert_equal(select_atoms_mock.call_count, 10)
-
-
 def test_SurvivalProbability_intermittency1and2(universe):
     """
     Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
     but then returns at the following step.
     """
     with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)]
+        ids = [(9, 8), (), (8,), (9,), (8,), (), (9, 8), (), (8,), (9, 8)]
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=9, verbose=True, intermittency=2)
-        assert all((x == {9, 8} for x in sp.selected_ids))
+        assert all(x == {9, 8} for x in sp._intermittent_selected_ids)
         assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1])
 
 
 def test_SurvivalProbability_intermittency2lacking(universe):
     """
-    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
+    If an atom is not present for more than 2 consecutive frames,
+    it is considered to have left the region.
     """
     with patch.object(universe, 'select_atoms') as select_atoms_mock:
         ids = [(9,), (), (), (), (9,), (), (), (), (9,)]
@@ -186,7 +116,7 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == {2, 3} for x in sp.selected_ids))
+        assert all((x == {2, 3} for x in sp._intermittent_selected_ids))
         assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
 
 
@@ -201,11 +131,42 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == {1} for x in sp.selected_ids))
-        assert len(sp.selected_ids) == beforepopsing
+        assert all((x == {1} for x in sp._intermittent_selected_ids))
+        assert len(sp._selected_ids) == beforepopsing
         assert_almost_equal(sp.sp_timeseries, [1, 1])
 
 
+def test_intermittency_none():
+    # No changes asked - returns the same data
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
+    corrected = correct_intermittency(input_ids, intermittency=0)
+    assert all(x == y for x,y in zip(input_ids, corrected))
+
+
+def test_intermittency_1and2():
+    # The maximum gap in the dataset is 2, so the IDs are always present after correction
+    input_ids = [{9, 8}, set(), {8, }, {9, }, {8, }, set(), {9, 8}, set(), {8, }, {9, 8, }]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all((x == {9, 8} for x in corrected))
+
+
+def test_intermittency_2tooShort():
+    #The IDs are abscent for too long/
+    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all(x == y for x, y in zip(input_ids, corrected))
+
+
+def test_intermittency_setsOfSets():
+    # Verificaiton for the case of hydrogen bonds (sets of sets)
+    input_ids = [{frozenset({1,2}), frozenset({3, 4})},set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}, set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}, set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all((x == {frozenset({1, 2}), frozenset({3, 4})} for x in corrected))
+
+
 def test_autocorrelation_alwaysPresent():
     input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}]
     tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3)
@@ -218,62 +179,99 @@ def test_autocorrelation_definedTaus():
     assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0])
 
 
-def test_autocorrelation_intermittency1and2():
-    """
-    Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
-    but then returns at the following step.
-    """
-    input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
-    # input_ids are modified in place, check if the gaps have been removed correctly
-    assert all((x == {9, 8} for x in input_ids))
-    assert_almost_equal(sp_timeseries, [1, 1, 1, 1])
-    
-
-def test_autocorrelation_intermittency2lacking():
-    """
-    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
-    """
-    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
-    assert_almost_equal(sp_timeseries, [1, 0, 0, 0])
-
-
 def test_autocorrelation_intermittency1_windowJump_intermittencyAll():
     """
     Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step.
     No frames should be skipped so intermittency should be applied to all.
     """
     input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
-                                                                        intermittency=1, window_jump=5)
-    assert all((x == {2, 3} for x in input_ids))
+    corrected = correct_intermittency(input_ids, intermittency=1)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2,
+                                                                        window_step=5)
+    assert all((x == {2, 3} for x in corrected))
     assert_almost_equal(sp_timeseries, [1, 1, 1])
 
 
-def test_autocorrelation_intermittency0_windowBigJump():
-    """
-    The empty sets are ignored (no interrttency)
-    """
+def test_autocorrelation_windowBigJump():
+    #The empty sets are ignored (no intermittency)
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5)
     assert_almost_equal(sp_timeseries, [1, 1, 1])
 
 
-def test_autocorrelation_intermittency0_windowBigJump_absence():
-    """
-    In the last frame the molecules are absent
-    """
+def test_autocorrelation_windowBigJump_absence():
+    # In the last frame the molecules are absent
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5)
     assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.])
 
 
+def test_autocorrelation_intermittency1_many():
+    input_ids = [{1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}]
+    corrected = correct_intermittency(input_ids, intermittency=1)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=14,
+                                                                        window_step=5)
+    assert_almost_equal(sp_timeseries, [1] * 15)
+
+
 def test_autocorrelation_intermittency2_windowBigJump():
-    """
-    The intermittency corrects the last frame
-    """
+    # The intermittency corrects the last frame
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
-                                                                        intermittency=2, window_jump=5)
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2,
+                                                                        window_step=5)
     assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_SurvivalProbability_t0tf(universe):
+    with patch.object(universe, 'select_atoms') as select_atoms_mock:
+        ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )]
+        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=2, stop=6)
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
+
+
+def test_SurvivalProbability_definedTaus(universe):
+    with patch.object(universe, 'select_atoms') as select_atoms_mock:
+        ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)]
+        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=0, stop=6, verbose=True)
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
+
+
+def test_SurvivalProbability_zeroMolecules(universe):
+    # no atom IDs found
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=3, stop=6, verbose=True)
+        assert all(np.isnan(sp.sp_timeseries[1:]))
+
+
+def test_SurvivalProbability_alwaysPresent(universe):
+    # always the same atom IDs found, 7 and 8
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=0, stop=6, verbose=True)
+        assert all(np.equal(sp.sp_timeseries, 1))
+
+
+def test_SurvivalProbability_stepLargerThanDtmax(universe):
+    # Testing if the frames are skipped correctly
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=2, step=5, stop=9, verbose=True)
+        assert_equal(sp.sp_timeseries, [1, 1, 1])
+        # with tau_max=2 for all the frames we only read 6 of them
+        # this is because the frames which are not used are skipped, and therefore 'select_atoms'
+        assert universe.trajectory.n_frames > 6
+        assert_equal(select_atoms_mock.call_count, 6)
+
+
+def test_SurvivalProbability_stepEqualDtMax(universe):
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=4, step=5, stop=9, verbose=True)
+        # all frames from 0, with 9 inclusive
+        assert_equal(select_atoms_mock.call_count, 10)

From 968059b97a8022594111786fa11c194d02f61087 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Wed, 10 Apr 2019 14:23:56 +0100
Subject: [PATCH 09/40] Autocorrelation extracted from SP.

---
 package/MDAnalysis/analysis/utils/__init__.py |   0
 .../analysis/utils/autocorrelation.py         | 105 ++++++++++++++++++
 package/MDAnalysis/analysis/waterdynamics.py  |  27 +----
 3 files changed, 109 insertions(+), 23 deletions(-)
 create mode 100644 package/MDAnalysis/analysis/utils/__init__.py
 create mode 100644 package/MDAnalysis/analysis/utils/autocorrelation.py

diff --git a/package/MDAnalysis/analysis/utils/__init__.py b/package/MDAnalysis/analysis/utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
new file mode 100644
index 00000000000..867eee2fa39
--- /dev/null
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -0,0 +1,105 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+# doi: 10.25080/majora-629e541a-00e
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""
+A flexible implementation of an autocorrelation function.
+"""
+
+import numpy as np
+
+# TODO add documentation
+def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+    # FIXME - make sure that it is not just a list of numbers
+
+    # correct the dataset for gaps (intermittency)
+    _correct_intermittency(intermittency, list_of_sets)
+
+    # calculate Survival Probability
+    tau_timeseries = np.arange(1, tau_max + 1)
+    sp_timeseries_data = [[] for _ in range(tau_max)]
+
+    for t in range(0, len(list_of_sets), window_jump):
+        Nt = len(list_of_sets[t])
+
+        if Nt == 0:
+            continue
+
+        for tau in tau_timeseries:
+            if t + tau >= len(list_of_sets):
+                break
+
+            # continuous: IDs that survive from t to t + tau and at every frame in between
+            Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1]))
+            sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+
+    sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+    return tau_timeseries, sp_timeseries, sp_timeseries_data
+
+
+def _correct_intermittency(intermittency, id_list, verbose=False):
+    """
+    Pre-process Consecutive Intermittency with a single pass over the data.
+    If an atom is absent for a number of frames equal or smaller
+    than the parameter intermittency, then correct the data and remove the absence.
+    ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
+
+    :param intermittency: the max gap allowed and to be corrected
+    :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    """
+    if intermittency == 0:
+        return
+
+    if verbose:
+        print('Correcting the selected IDs for intermittancy (gaps). ')
+
+    for i, ids in enumerate(id_list):
+        # initially update each frame as seen 0 ago (now)
+        seen_frames_ago = {i: 0 for i in ids}
+        for j in range(1, intermittency + 2):
+            for atomid in seen_frames_ago.keys():
+                # no more frames
+                if i + j >= len(id_list):
+                    continue
+
+                # if the atom is absent now
+                if not atomid in id_list[i + j]:
+                    # increase its absence counter
+                    seen_frames_ago[atomid] += 1
+                    continue
+
+                # the atom is found
+                if seen_frames_ago[atomid] == 0:
+                    # the atom was present in the last frame
+                    continue
+
+                # it was absent more times than allowed
+                if seen_frames_ago[atomid] > intermittency:
+                    continue
+
+                # the atom was absent but returned (within <= intermittency)
+                # add it to the frames where it was absent.
+                # Introduce the corrections.
+                for k in range(seen_frames_ago[atomid], 0, -1):
+                    id_list[i + j - k].add(atomid)
+
+                seen_frames_ago[atomid] = 0
\ No newline at end of file
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index a0f3b8cd379..553f5903cf7 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -458,6 +458,7 @@
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
+from .utils.autocorrelation import autocorrelation
 
 
 class HydrogenBondLifetimes(object):
@@ -1408,36 +1409,16 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             frame_no += 1
             frame_loaded_counter += 1
 
-        # correct the dataset for gaps (intermittency)
-        self._correct_intermittency(intermittency, self.selected_ids)
-
-        # calculate Survival Probability
-        tau_timeseries = np.arange(1, tau_max + 1)
-        sp_timeseries_data = [[] for _ in range(tau_max)]
-
         # adjust for the frames that were not loaded (step>tau_max + 1),
         # and for extra frames that were loaded (intermittency)
         window_jump = step - num_frames_to_skip
 
-        for t in range(0, len(self.selected_ids), window_jump):
-            Nt = len(self.selected_ids[t])
-
-            if Nt == 0:
-                self.print(verbose,
-                           "At frame {} the selection did not find any molecule. Moving on to the next frame".format(t))
-                continue
-
-            for tau in tau_timeseries:
-                if t + tau >= len(self.selected_ids):
-                    break
-
-                # continuous: IDs that survive from t to t + tau and at every frame in between
-                Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1]))
-                sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+        tau_timeseries, sp_timeseries, sp_timeseries_data = \
+            autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
 
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
 
         self.tau_timeseries = tau_timeseries
-        self.sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+        self.sp_timeseries = sp_timeseries
         return self

From c2b90297cce52d2657527d04d46e28e76195cf61 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 13:55:47 +0100
Subject: [PATCH 10/40] Exhaustive testing for autocorrelation. It will now
 return 1 at tau=0. Now the function can be embedded into
 HydrogenBondLifetime.

---
 .../analysis/utils/autocorrelation.py         | 15 ++-
 package/MDAnalysis/analysis/waterdynamics.py  | 18 ++--
 .../analysis/test_waterdynamics.py            | 98 ++++++++++++++++---
 3 files changed, 111 insertions(+), 20 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 867eee2fa39..6f162e15222 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -29,13 +29,21 @@
 
 # TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+    """
+
+    :param list_of_sets: Modifies in place!
+    :param tau_max:
+    :param window_jump:
+    :param intermittency:
+    :return:
+    """
     # FIXME - make sure that it is not just a list of numbers
 
     # correct the dataset for gaps (intermittency)
     _correct_intermittency(intermittency, list_of_sets)
 
     # calculate Survival Probability
-    tau_timeseries = np.arange(1, tau_max + 1)
+    tau_timeseries = list(range(1, tau_max + 1))
     sp_timeseries_data = [[] for _ in range(tau_max)]
 
     for t in range(0, len(list_of_sets), window_jump):
@@ -53,6 +61,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
             sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
 
     sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+
+    # at time 0 the value has to be one
+    tau_timeseries.insert(0, 0)
+    sp_timeseries.insert(0, 1)
+
     return tau_timeseries, sp_timeseries, sp_timeseries_data
 
 
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 553f5903cf7..d5143fa989d 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -297,10 +297,6 @@
   for tau, sp in zip(tau_timeseries, sp_timeseries):
         print("{time} {sp}".format(time=tau, sp=sp))
 
-  # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0:
-  tau_timeseries.insert(0, 0)
-  sp_timeseries.insert(1, 0)
-
   # plot
   plt.xlabel('Time')
   plt.ylabel('SP')
@@ -464,19 +460,19 @@
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
 
-    This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes"
+    This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
     connectivity. A slow decay indicate very stables hydrogen bonds, like in
     ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
-    (HBPR). In the continuos case we have:
+    (HBPR). In the continuous case we have:
 
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
-    :math:`ij` during time interval :math:`t_0+\tau` (continuos) and
+    :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
 
     .. math::
@@ -1373,6 +1369,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         # preload the frames (atom IDs) to a list of sets
         self.selected_ids = []
 
+        # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices,
+        # on which the parallel _single_frame can be applied.
+
         # skip frames that will not be used
         # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip
         # Intermittency means that we have to load the extra frames to know if the atom is actually missing.
@@ -1416,6 +1415,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         tau_timeseries, sp_timeseries, sp_timeseries_data = \
             autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
 
+        # warn the user if the NaN are found
+        if all(np.isnan(sp_timeseries[1:])):
+            # fixme - warn the user that the dataset. Use a standardised warning.
+            print('NaN Error: Most likely data was not found. Check your atom selections. ')
+
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
 
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index e6dc29b0f35..04e09c48611 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -23,9 +23,9 @@
 from __future__ import print_function, absolute_import
 import MDAnalysis
 from MDAnalysis.analysis import waterdynamics
+from MDAnalysis.analysis.utils.autocorrelation import autocorrelation
 
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
-from MDAnalysisTests.datafiles import PDB, XTC
 
 import pytest
 import numpy as np
@@ -92,7 +92,7 @@ def test_SurvivalProbability_t0tf(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=2, stop=6)
-        assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
 
 
 def test_SurvivalProbability_definedTaus(universe):
@@ -101,7 +101,7 @@ def test_SurvivalProbability_definedTaus(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
 
 
 def test_SurvivalProbability_zeroMolecules(universe):
@@ -109,7 +109,7 @@ def test_SurvivalProbability_zeroMolecules(universe):
     with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, start=3, stop=6, verbose=True)
-        assert all(np.isnan(sp.sp_timeseries))
+        assert all(np.isnan(sp.sp_timeseries[1:]))
 
 
 def test_SurvivalProbability_alwaysPresent(universe):
@@ -125,7 +125,7 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe):
     with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, step=5, stop=9, verbose=True)
-        assert_equal(sp.sp_timeseries, [1, 1])
+        assert_equal(sp.sp_timeseries, [1, 1, 1])
         # with tau_max=2 for all the frames we only read 6 of them
         # this is because the frames which are not used are skipped, and therefore 'select_atoms'
         assert universe.trajectory.n_frames > 6
@@ -150,8 +150,8 @@ def test_SurvivalProbability_intermittency1and2(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=9, verbose=True, intermittency=2)
-        assert all((x == set([9, 8]) for x in sp.selected_ids))
-        assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
+        assert all((x == {9, 8} for x in sp.selected_ids))
+        assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1])
 
 
 def test_SurvivalProbability_intermittency2lacking(universe):
@@ -163,7 +163,7 @@ def test_SurvivalProbability_intermittency2lacking(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=8, verbose=True, intermittency=2)
-        assert_almost_equal(sp.sp_timeseries, [0, 0, 0])
+        assert_almost_equal(sp.sp_timeseries, [1, 0, 0, 0])
 
 
 def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
@@ -176,8 +176,9 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == set([2, 3]) for x in sp.selected_ids))
-        assert_almost_equal(sp.sp_timeseries, [1, 1])
+        assert all((x == {2, 3} for x in sp.selected_ids))
+        assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
+
 
 def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
     """
@@ -190,6 +191,79 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == set([1]) for x in sp.selected_ids))
+        assert all((x == {1} for x in sp.selected_ids))
         assert len(sp.selected_ids) == beforepopsing
-        assert_almost_equal(sp.sp_timeseries, [1])
\ No newline at end of file
+        assert_almost_equal(sp.sp_timeseries, [1, 1])
+
+
+def test_autocorrelation_alwaysPresent():
+    input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3)
+    assert all(np.equal(sp_timeseries, 1))
+
+
+def test_autocorrelation_definedTaus():
+    input_ids = [{9, 8, 7}, {8, 7, 6}, {7, 6, 5}, {6, 5, 4}, {5, 4, 3}, {4, 3, 2}, {3, 2, 1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3)
+    assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0])
+
+
+def test_autocorrelation_intermittency1and2():
+    """
+    Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
+    but then returns at the following step.
+    """
+    input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
+    # input_ids are modified in place, check if the gaps have been removed correctly
+    assert all((x == {9, 8} for x in input_ids))
+    assert_almost_equal(sp_timeseries, [1, 1, 1, 1])
+    
+
+def test_autocorrelation_intermittency2lacking():
+    """
+    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
+    """
+    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
+    assert_almost_equal(sp_timeseries, [1, 0, 0, 0])
+
+
+def test_autocorrelation_intermittency1_windowJump_intermittencyAll():
+    """
+    Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step.
+    No frames should be skipped so intermittency should be applied to all.
+    """
+    input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
+                                                                        intermittency=1, window_jump=5)
+    assert all((x == {2, 3} for x in input_ids))
+    assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_autocorrelation_intermittency0_windowBigJump():
+    """
+    The empty sets are ignored (no interrttency)
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_autocorrelation_intermittency0_windowBigJump_absence():
+    """
+    In the last frame the molecules are absent
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.])
+
+
+def test_autocorrelation_intermittency2_windowBigJump():
+    """
+    The intermittency corrects the last frame
+    """
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
+                                                                        intermittency=2, window_jump=5)
+    assert_almost_equal(sp_timeseries, [1, 1, 1])

From bba1474b7e5c4d4cc5624e47ce500482bff48018 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:42:32 +0100
Subject: [PATCH 11/40] A more general documentation in the autocorrelation.

---
 .../analysis/utils/autocorrelation.py         | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 6f162e15222..dc4a186f040 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -21,15 +21,14 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
-"""
-A flexible implementation of an autocorrelation function.
-"""
-
 import numpy as np
 
+
 # TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     """
+    The descrete implementation of the autocorrelation function.
+    fixme - change to the numpy documentation
 
     :param list_of_sets: Modifies in place!
     :param tau_max:
@@ -37,15 +36,16 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     :param intermittency:
     :return:
     """
-    # FIXME - make sure that it is not just a list of numbers
+    # fixme - add checks if this is a list of sets
+    # fixme - check dimensions
 
     # correct the dataset for gaps (intermittency)
     _correct_intermittency(intermittency, list_of_sets)
 
-    # calculate Survival Probability
     tau_timeseries = list(range(1, tau_max + 1))
-    sp_timeseries_data = [[] for _ in range(tau_max)]
+    timeseries_data = [[] for _ in range(tau_max)]
 
+    # calculate autocorrelation
     for t in range(0, len(list_of_sets), window_jump):
         Nt = len(list_of_sets[t])
 
@@ -58,15 +58,15 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
 
             # continuous: IDs that survive from t to t + tau and at every frame in between
             Ntau = len(set.intersection(*list_of_sets[t:t + tau + 1]))
-            sp_timeseries_data[tau - 1].append(Ntau / float(Nt))
+            timeseries_data[tau - 1].append(Ntau / float(Nt))
 
-    sp_timeseries = [np.mean(sp) for sp in sp_timeseries_data]
+    timeseries = [np.mean(x) for x in timeseries_data]
 
     # at time 0 the value has to be one
     tau_timeseries.insert(0, 0)
-    sp_timeseries.insert(0, 1)
+    timeseries.insert(0, 1)
 
-    return tau_timeseries, sp_timeseries, sp_timeseries_data
+    return tau_timeseries, timeseries, timeseries_data
 
 
 def _correct_intermittency(intermittency, id_list, verbose=False):

From 3056a7ee2ce4477c652eae2f7eaab693dc9e7b87 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:43:03 +0100
Subject: [PATCH 12/40] Updated test cases to illuminate the bug in #2247.

---
 .../MDAnalysisTests/analysis/test_waterdynamics.py    | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 04e09c48611..1317c77afa2 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -45,8 +45,17 @@ def universe():
 def test_HydrogenBondLifetimes(universe):
     hbl = waterdynamics.HydrogenBondLifetimes(
         universe, SELECTION1, SELECTION1, 0, 5, 3)
+    hbl.run(stop=5)
+    assert_almost_equal(hbl.timeseries[2], 0.75)
+
+
+def test_HydrogenBondLifetimes_growing_continuous(universe):
+    # The autocorrelation cannot grow
+    hbl = waterdynamics.HydrogenBondLifetimes(
+        universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5)
     hbl.run()
-    assert_almost_equal(hbl.timeseries[2][1], 0.75, 5)
+    # Index 0 in frameX[0] refers to the continuous, 1 to intermittent
+    assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])])
 
 
 def test_WaterOrientationalRelaxation(universe):

From 22d642162c186232a4018fd388f797d7c1368c09 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Sun, 14 Apr 2019 16:43:55 +0100
Subject: [PATCH 13/40] First switch to using autocorrelation that passes the
 tests.

---
 package/MDAnalysis/analysis/waterdynamics.py | 213 +++----------------
 1 file changed, 27 insertions(+), 186 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index d5143fa989d..ad32d75074f 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -464,8 +464,8 @@ class HydrogenBondLifetimes(object):
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
-    connectivity. A slow decay indicate very stables hydrogen bonds, like in
-    ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
+    connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice).
+    The HBL is also know as "Hydrogen Bond Population Relaxation"
     (HBPR). In the continuous case we have:
 
     .. math::
@@ -507,6 +507,7 @@ class HydrogenBondLifetimes(object):
 
     def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
                  nproc=1):
+        # fixme - use the same interface you used in SP
         self.universe = universe
         self.selection1 = selection1
         self.selection2 = selection2
@@ -516,196 +517,33 @@ def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
         self.nproc = nproc
         self.timeseries = None
 
-    def _getC_i(self, HBP, t0, t):
-        """
-        This function give the intermitent Hydrogen Bond Lifetime
-        C_i = <h(t0)h(t)>/<h(t0)> between t0 and t
-        """
-        C_i = 0
-        for i in range(len(HBP[t0])):
-            for j in range(len(HBP[t])):
-                if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]):
-                    C_i += 1
-                    break
-        if len(HBP[t0]) == 0:
-            return 0.0
-        else:
-            return float(C_i) / len(HBP[t0])
 
-    def _getC_c(self, HBP, t0, t):
-        """
-        This function give the continous Hydrogen Bond Lifetime
-        C_c = <h(t0)h'(t)>/<h(t0)> between t0 and t
-        """
-        C_c = 0
-        dt = 1
-        begt0 = t0
-        HBP_cp = HBP
-        HBP_t0 = HBP[t0]
-        newHBP = []
-        if t0 == t:
-            return 1.0
-        while t0 + dt <= t:
-            for i in range(len(HBP_t0)):
-                for j in range(len(HBP_cp[t0 + dt])):
-                    if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and
-                        HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]):
-                        newHBP.append(HBP_t0[i])
-                        break
-            C_c = len(newHBP)
-            t0 += dt
-            HBP_t0 = newHBP
-            newHBP = []
-        if len(HBP[begt0]) == 0:
-            return 0
-        else:
-            return C_c / float(len(HBP[begt0]))
+    def run(self, **kwargs):
+        # Get all hydrogen bonds
+        hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
+                                                                 self.selection1,
+                                                                 self.selection2,
+                                                                 distance=3.5,
+                                                                 angle=120.0)
+        # fixme - use it only for the time requested
+        # fixme - this currently ignores the constructor parameters t0 tf etc
+        hydrogen_bonds.run(**kwargs)
 
-    def _intervC_c(self, HBP, t0, tf, dt):
-        """
-        This function gets all the data for the h(t0)h(t0+dt)', where
-        t0 = 1,2,3,...,tf. This function give us one point of the final plot
-        HBL vs t
-        """
-        a = 0
-        count = 0
-        for i in range(len(HBP)):
-            if (t0 + dt <= tf):
-                if t0 == t0 + dt:
-                    b = self._getC_c(HBP, t0, t0)
-                    break
-                b = self._getC_c(HBP, t0, t0 + dt)
-                t0 += dt
-                a += b
-                count += 1
-        if count == 0:
-            return 1.0
-        return a / count
-
-    def _intervC_i(self, HBP, t0, tf, dt):
-        """
-        This function gets all the data for the h(t0)h(t0+dt), where
-        t0 = 1,2,3,...,tf. This function give us a point of the final plot
-        HBL vs t
-        """
-        a = 0
-        count = 0
-        for i in range(len(HBP)):
-            if (t0 + dt <= tf):
-                b = self._getC_i(HBP, t0, t0 + dt)
-                t0 += dt
-                a += b
-                count += 1
-        return a / count
-
-    def _finalGraphGetC_i(self, HBP, t0, tf, maxdt):
-        """
-        This function gets the final data of the C_i graph.
-        """
-        output = []
-        for dt in range(maxdt):
-            a = self._intervC_i(HBP, t0, tf, dt)
-            output.append(a)
-        return output
+        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
+        hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-    def _finalGraphGetC_c(self, HBP, t0, tf, maxdt):
-        """
-        This function gets the final data of the C_c graph.
-        """
-        output = []
-        for dt in range(maxdt):
-            a = self._intervC_c(HBP, t0, tf, dt)
-            output.append(a)
-        return output
+        # Calculate the
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax)
 
-    def _getGraphics(self, HBP, t0, tf, maxdt):
-        """
-        Function that join all the results into a plot.
-        """
-        a = []
-        cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt)
-        inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt)
-        for i in range(len(cont)):
-            fix = [cont[i], inte[i]]
-            a.append(fix)
-        return a
+        # fixme - document
+        self.hydrogen_bonds = hydrogen_bonds.timeseries
 
-    def _HBA(self, ts, conn, universe, selAtom1, selAtom2,
-             verbose=False):
-        """
-        Main function for calculate C_i and C_c in parallel.
-        """
-        finalGetResidue1 = selAtom1
-        finalGetResidue2 = selAtom2
-        frame = ts.frame
-        h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
-                                                            finalGetResidue1,
-                                                            finalGetResidue2,
-                                                            distance=3.5,
-                                                            angle=120.0,
-                                                            start=frame - 1,
-                                                            stop=frame)
-        while True:
-            try:
-                h.run(verbose=verbose)
-                break
-            except:
-                print("error")
-                print("trying again")
-                sys.stdout.flush()
-        sys.stdout.flush()
-        conn.send(h.timeseries[0])
-        conn.close()
+        # user can investigate the distribution and sample size
+        self.sp_timeseries_data = timeseries_data
+        self.tau_timeseries = tau_timeseries
+        self.timeseries = timeseries
+        return self
 
-    def run(self, **kwargs):
-        """Analyze trajectory and produce timeseries"""
-        h_list = []
-        i = 0
-        if (self.nproc > 1):
-            while i < len(self.universe.trajectory):
-                jobs = []
-                k = i
-                for j in range(self.nproc):
-                        # start
-                    print("ts=", i + 1)
-                    if i >= len(self.universe.trajectory):
-                        break
-                    conn_parent, conn_child = multiprocessing.Pipe(False)
-                    while True:
-                        try:
-                            # new thread
-                            jobs.append(
-                                (multiprocessing.Process(
-                                    target=self._HBA,
-                                    args=(self.universe.trajectory[i],
-                                          conn_child, self.universe,
-                                          self.selection1, self.selection2,)),
-                                 conn_parent))
-                            break
-                        except:
-                            print("error in jobs.append")
-                    jobs[j][0].start()
-                    i = i + 1
-
-                for j in range(self.nproc):
-                    if k >= len(self.universe.trajectory):
-                        break
-                    rec01 = jobs[j][1]
-                    received = rec01.recv()
-                    h_list.append(received)
-                    jobs[j][0].join()
-                    k += 1
-            self.timeseries = self._getGraphics(
-                h_list, 0, self.tf - 1, self.dtmax)
-        else:
-            h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
-                                                                     self.selection1,
-                                                                     self.selection2,
-                                                                     distance=3.5,
-                                                                     angle=120.0)
-            h_list.run(**kwargs)
-            self.timeseries = self._getGraphics(
-                h_list.timeseries, self.t0, self.tf, self.dtmax)
 
 
 class WaterOrientationalRelaxation(object):
@@ -1347,6 +1185,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             tau from 1 to `tau_max`. Saved in the field tau_timeseries.
         sp_timeseries : list
             survival probability for each value of `tau`. Saved in the field sp_timeseries.
+        sp_timeseries_data: list
+            raw datapoints from which the average is taken (sp_timeseries).
+            Time dependancy and distribution can be extracted.
         """
 
         # backward compatibility (and priority)

From 9188f115ee0af2c8ef77fa055186651dbf334c30 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Mon, 15 Apr 2019 23:28:11 +0100
Subject: [PATCH 14/40] The documentation updated with the code. The
 HydrogenBondAnalysis should ideally be extracted from the
 HydrogenBondLifetime and fed into it in the constructor.

---
 .../analysis/utils/autocorrelation.py         |  59 +++++--
 package/MDAnalysis/analysis/waterdynamics.py  | 154 +++++++++---------
 2 files changed, 124 insertions(+), 89 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index dc4a186f040..50891765009 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -24,23 +24,48 @@
 import numpy as np
 
 
-# TODO add documentation
 def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
-    """
-    The descrete implementation of the autocorrelation function.
-    fixme - change to the numpy documentation
-
-    :param list_of_sets: Modifies in place!
-    :param tau_max:
-    :param window_jump:
-    :param intermittency:
-    :return:
+    r"""The descrete implementation of the autocorrelation function.
+
+    Here is a random equation that shows something.
+
+    .. math::
+       C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
+
+
+
+    fixme -  list_of_sets: Modifies in place!
+    Parameters
+    ----------
+    list_of_sets : list
+      List of sets,
+    tau_max : int
+      The last tau (inclusive) for which to carry out autocorrelation.
+    window_jump : int, optional
+      The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
+    intermittency : int, optional
+      Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns,
+      intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any
+      frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will
+      be counted as present.
+
+    Returns
+    --------
+    tau_timeseries : list of int
+        the tau for which the autocorrelation was calculated
+    timeseries : list of int
+        the autocorelation values for each of the tau values
+    timeseries_data : list of list of int
+        the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated.
+
+    .. versionadded:: 0.19.2
     """
     # fixme - add checks if this is a list of sets
     # fixme - check dimensions
+    # fixme - check parameters?
 
     # correct the dataset for gaps (intermittency)
-    _correct_intermittency(intermittency, list_of_sets)
+    correct_intermittency(intermittency, list_of_sets)
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]
@@ -69,15 +94,21 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     return tau_timeseries, timeseries, timeseries_data
 
 
-def _correct_intermittency(intermittency, id_list, verbose=False):
+def correct_intermittency(intermittency, id_list, verbose=False):
     """
     Pre-process Consecutive Intermittency with a single pass over the data.
     If an atom is absent for a number of frames equal or smaller
     than the parameter intermittency, then correct the data and remove the absence.
     ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
 
-    :param intermittency: the max gap allowed and to be corrected
-    :param id_list: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    Parameters
+    ----------
+    intermittency: int
+        the max gap allowed and to be corrected
+    id_list: fixme
+        modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
+    verbose: Boolean, optional
+        print
     """
     if intermittency == 0:
         return
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index ad32d75074f..2635bacbbb3 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -450,15 +450,16 @@
 from six.moves import range, zip_longest
 
 import numpy as np
-import multiprocessing
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation
 
 
+
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
+    fixme - update the definition of intermittency (refer the user to autocorrelation)
 
     This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
@@ -492,48 +493,100 @@ class HydrogenBondLifetimes(object):
       It could be any selection available in MDAnalysis, not just water.
     selection2 : str
       Selection string to analize its HBL against selection1
-    t0 : int
-      frame  where analysis begins
-    tf : int
-      frame where analysis ends
-    dtmax : int
-      Maximum dt size, `dtmax` < `tf` or it will crash.
-    nproc : int
-      Number of processors to use, by default is 1.
-
+    verbose : Boolean, optional
+      When True, prints progress and comments to the console.
 
     .. versionadded:: 0.11.0
     """
 
-    def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
-                 nproc=1):
-        # fixme - use the same interface you used in SP
+    def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False):
         self.universe = universe
         self.selection1 = selection1
         self.selection2 = selection2
-        self.t0 = t0
-        self.tf = tf - 1
-        self.dtmax = dtmax
-        self.nproc = nproc
-        self.timeseries = None
+        self.verbose = verbose
 
+        # backward compatibility
+        self.start = self.stop = self.tau_max = None
+        if t0 is not None:
+            self.start = t0
+            warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning)
+
+        if tf is not None:
+            self.stop = tf
+            warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning)
+
+        if dtmax is not None:
+            self.tau_max = dtmax
+            warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning)
+
+
+    def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs):
+        """
+        Computes and returns the Survival Probability (SP) timeseries
+
+        Parameters
+        ----------
+        start : int, optional
+            Zero-based index of the first frame to be analysed
+        stop : int, optional
+            Zero-based index of the last frame to be analysed (inclusive)
+        step : int, optional
+            Jump every `step`-th frame. This is compatible but independant of the taus used.
+            Note that `step` and `tau_max` work consistently with intermittency.
+        tau_max : int, optional
+            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
+        intermittency : int, optional
+            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
+            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
+            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
+            disappear for up to two consecutive frames yet be treated as being present at all frames.
+            The default is continuous (0).
+        verbose : Boolean, optional
+            Print the progress to the console
+
+        Returns
+        -------
+        tau_timeseries : list
+            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
+        timeseries : list
+            survival probability for each value of `tau`. Saved in the field sp_timeseries.
+        timeseries_data: list
+            raw datapoints from which the average is taken (sp_timeseries).
+            Time dependancy and distribution can be extracted.
+        """
+
+        # backward compatibility (and priority)
+        start = self.start if self.start is not None else start
+        stop = self.stop if self.stop is not None else stop
+        tau_max = self.tau_max if self.tau_max is not None else tau_max
+
+        # sanity checks
+        if stop is not None and stop >= len(self.universe.trajectory):
+            raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.")
+
+        if stop is None:
+            stop = len(self.universe.trajectory)
+        else:
+            stop = stop + 1
+
+        if tau_max > (stop - start):
+            raise ValueError("Too few frames selected for given tau_max.")
 
-    def run(self, **kwargs):
         # Get all hydrogen bonds
+        # fixme - we should not have the frozen definition of the hydrogen bonds
+        # fixme - this will be upated with the new HydrogenBond interface
         hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
                                                                  self.selection1,
                                                                  self.selection2,
                                                                  distance=3.5,
                                                                  angle=120.0)
-        # fixme - use it only for the time requested
-        # fixme - this currently ignores the constructor parameters t0 tf etc
-        hydrogen_bonds.run(**kwargs)
+        # Compute the minimal number of hydrogen bonds
+        hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs)
 
         # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
-        hb_ids = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
+        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-        # Calculate the
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(hb_ids, self.dtmax)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax)
 
         # fixme - document
         self.hydrogen_bonds = hydrogen_bonds.timeseries
@@ -1094,62 +1147,13 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa
             self.tau_max = dtmax
             warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning)
 
+
     def print(self, verbose, *args):
         if self.verbose:
             print(args)
         elif verbose:
             print(args)
 
-    def _correct_intermittency(self, intermittency, selected_ids):
-        """
-        Pre-process Consecutive Intermittency with a single pass over the data.
-        If an atom is absent for a number of frames equal or smaller
-        than the `intermittency`, then correct the data and remove the absence.
-        ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
-
-        Parameters
-        ----------
-        intermittency : int
-            the max gap allowed and to be corrected
-        selected_ids: list of ids
-            modifies the selecteded IDs in place by adding atoms which left for <= `intermittency`
-        """
-        self.print('Correcting the selected IDs for intermittancy (gaps). ')
-        if intermittency == 0:
-            return
-
-        for i, ids in enumerate(selected_ids):
-            # initially update each frame as seen 0 ago (now)
-            seen_frames_ago = {i: 0 for i in ids}
-            for j in range(1, intermittency + 2):
-                for atomid in seen_frames_ago.keys():
-                    # no more frames
-                    if i + j >= len(selected_ids):
-                        continue
-
-                    # if the atom is absent now
-                    if not atomid in selected_ids[i + j]:
-                        # increase its absence counter
-                        seen_frames_ago[atomid] += 1
-                        continue
-
-                    # the atom is found
-                    if seen_frames_ago[atomid] == 0:
-                        # the atom was present in the last frame
-                        continue
-
-                    # it was absent more times than allowed
-                    if seen_frames_ago[atomid] > intermittency:
-                        continue
-
-                    # the atom was absent but returned (within <= intermittency)
-                    # add it to the frames where it was absent.
-                    # Introduce the corrections.
-                    for k in range(seen_frames_ago[atomid], 0, -1):
-                        selected_ids[i + j - k].add(atomid)
-
-                    seen_frames_ago[atomid] = 0
-
 
     def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False):
         """

From 594183dfef0ec0c2a57b6d5fd64ad1f55ce9ea08 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Thu, 18 Apr 2019 13:20:34 +0100
Subject: [PATCH 15/40] Correction for the test to make it consistent with the
 previous author's test. Our end index is inclusive (5th frame, giving 6
 frames). But the other author's analysis was done on 0-4 frames (5 frames).

---
 package/MDAnalysis/analysis/waterdynamics.py  | 24 +++++++------------
 .../analysis/test_waterdynamics.py            |  5 ++--
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 2635bacbbb3..a77751f3618 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -445,16 +445,16 @@
 """
 from __future__ import print_function, division, absolute_import
 
-import warnings
-
 from six.moves import range, zip_longest
-
+import logging
+import warnings
 import numpy as np
 
 import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation
 
+logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')
 
 
 class HydrogenBondLifetimes(object):
@@ -475,6 +475,7 @@ class HydrogenBondLifetimes(object):
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
     :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
+    -fixme - update the intermittent definition
 
     .. math::
        C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
@@ -575,6 +576,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F
         # Get all hydrogen bonds
         # fixme - we should not have the frozen definition of the hydrogen bonds
         # fixme - this will be upated with the new HydrogenBond interface
+        # fixme - remove this from here and insert it into the example
         hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
                                                                  self.selection1,
                                                                  self.selection2,
@@ -586,7 +588,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=F
         # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
         found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
 
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.dtmax)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max)
 
         # fixme - document
         self.hydrogen_bonds = hydrogen_bonds.timeseries
@@ -1148,13 +1150,6 @@ def __init__(self, universe, selection, t0=None, tf=None, dtmax=None, verbose=Fa
             warnings.warn("dtmax is deprecated, use run(tau_max=dtmax) instead", category=DeprecationWarning)
 
 
-    def print(self, verbose, *args):
-        if self.verbose:
-            print(args)
-        elif verbose:
-            print(args)
-
-
     def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False):
         """
         Computes and returns the Survival Probability (SP) timeseries
@@ -1232,7 +1227,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         frame_no = start
         while frame_no < stop:      # we have already added 1 to stop, therefore <
             if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window:
-                self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip)
+                logger.info("Skipping the next %d frames:" % num_frames_to_skip)
                 frame_no += num_frames_to_skip
                 frame_loaded_counter = 0
                 # Correct the number of frames to be loaded after the first window (which starts at t=0, and
@@ -1243,7 +1238,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             # update the frame number
             self.universe.trajectory[frame_no]
 
-            self.print(verbose, "Loading frame:", self.universe.trajectory.ts)
+            logging.info("Loading frame:", self.universe.trajectory.ts)
             atoms = self.universe.select_atoms(self.selection)
 
             # SP of residues or of atoms
@@ -1262,8 +1257,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
 
         # warn the user if the NaN are found
         if all(np.isnan(sp_timeseries[1:])):
-            # fixme - warn the user that the dataset. Use a standardised warning.
-            print('NaN Error: Most likely data was not found. Check your atom selections. ')
+            logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ')
 
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 1317c77afa2..978b82bc29b 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -44,9 +44,10 @@ def universe():
 
 def test_HydrogenBondLifetimes(universe):
     hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, 0, 5, 3)
+        universe, SELECTION1, SELECTION1, 0, 4, 3)
     hbl.run(stop=5)
-    assert_almost_equal(hbl.timeseries[2], 0.75)
+    assert_almost_equal(hbl.timeseries[3], 0.75)
+
 
 
 def test_HydrogenBondLifetimes_growing_continuous(universe):

From c065f7e9122a84edbb39f1a7dd60a3eeae86d482 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmat@gmail.com>
Date: Thu, 18 Apr 2019 18:15:26 +0100
Subject: [PATCH 16/40] Extracting the autocorrelation and intermittency into
 their own functions with their own test cases.

Removed the class HydrogenBondLifetimes and moved the autocorrelation
functionality into the hbond_analysis class as a .autocorrelation
method. This method should replace also hbond_autocorrel.py file. Test
cases for the new method .autocorrelation are added.

Added a test cases that, with our definition
of intermittency, would fail in the case of the hbond_autocorrel.py.
---
 .../analysis/hbonds/hbond_analysis.py         |  55 +++++
 .../analysis/hbonds/hbond_autocorrel.py       |   5 +
 .../analysis/utils/autocorrelation.py         |  40 ++-
 package/MDAnalysis/analysis/waterdynamics.py  | 138 ++---------
 .../MDAnalysisTests/analysis/test_hbonds.py   |  33 +++
 .../analysis/test_waterdynamics.py            | 232 +++++++++---------
 6 files changed, 251 insertions(+), 252 deletions(-)

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index 99fb6287b2c..4240662ee34 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -336,6 +336,8 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
 from MDAnalysis.lib import distances
 from MDAnalysis.lib.util import deprecate
 
+from ..utils.autocorrelation import autocorrelation, correct_intermittency
+
 
 logger = logging.getLogger('MDAnalysis.analysis.hbonds')
 
@@ -1440,3 +1442,56 @@ def _make_dict(donors, hydrogens):
         h2donor.update(_make_dict(s1d, s1h))
 
         return h2donor
+
+
+    def autocorrelation(self, tau_max=20, window_step=1, intermittency=0):
+        """
+        Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds.
+
+        Parameters
+        ----------
+        window_step : int, optional
+            Jump every `step`-th frame. This is compatible but independant of the taus used.
+            Note that `step` and `tau_max` work consistently with intermittency.
+        tau_max : int, optional
+            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
+        intermittency : int, optional
+            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
+            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
+            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
+            disappear for up to two consecutive frames yet be treated as being present at all frames.
+            The default is continuous (0).
+
+        Returns
+        -------
+        tau_timeseries : list
+            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
+        timeseries : list
+            autcorrelation value for each value of `tau`. Saved in the field timeseries.
+        timeseries_data: list
+            raw datapoints from which the average is taken (timeseries).
+            Time dependancy and distribution can be extracted.
+        """
+
+        if self._timeseries is None:
+            logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found")
+            logging.error("Autocorrelation: Please use the .run() before calling this function")
+            return
+
+        if self.step != 1:
+            logging.warning("Autocorrelation function should be carried out on consecutive frames. ")
+            logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'")
+
+        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..]
+        hydrogen_bonds = self.timeseries
+        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds]
+
+        intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency)
+        tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max,
+                                                                      window_step=window_step)
+        self.acf_tau_timeseries = tau_timeseries
+        self.acf_timeseries = timeseries
+        # user can investigate the distribution and sample size
+        self.acf_timeseries_data = timeseries_data
+
+        return self
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
index ebb2933c317..93b332e6ec7 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
@@ -296,6 +296,11 @@ def __init__(self, universe,
                  nruns=1,  # number of times to iterate through the trajectory
                  nsamples=50,  # number of different points to sample in a run
                  pbc=True):
+
+        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
+                      "which has .autocorrelation function",
+                      category=DeprecationWarning)
+
         self.u = universe
         # check that slicing is possible
         try:
diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 50891765009..efb25c40623 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -22,9 +22,10 @@
 #
 
 import numpy as np
+from copy import deepcopy
 
 
-def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
+def autocorrelation(list_of_sets, tau_max, window_step=1):
     r"""The descrete implementation of the autocorrelation function.
 
     Here is a random equation that shows something.
@@ -33,7 +34,6 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
 
-
     fixme -  list_of_sets: Modifies in place!
     Parameters
     ----------
@@ -41,8 +41,9 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
       List of sets,
     tau_max : int
       The last tau (inclusive) for which to carry out autocorrelation.
-    window_jump : int, optional
+    window_step : int, optional
       The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
+    fixme - move intermittency to the right place
     intermittency : int, optional
       Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns,
       intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any
@@ -64,14 +65,11 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
     # fixme - check dimensions
     # fixme - check parameters?
 
-    # correct the dataset for gaps (intermittency)
-    correct_intermittency(intermittency, list_of_sets)
-
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]
 
     # calculate autocorrelation
-    for t in range(0, len(list_of_sets), window_jump):
+    for t in range(0, len(list_of_sets), window_step):
         Nt = len(list_of_sets[t])
 
         if Nt == 0:
@@ -93,8 +91,8 @@ def autocorrelation(list_of_sets, tau_max, window_jump=1, intermittency=0):
 
     return tau_timeseries, timeseries, timeseries_data
 
-
-def correct_intermittency(intermittency, id_list, verbose=False):
+# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds)
+def correct_intermittency(list_of_sets, intermittency):
     """
     Pre-process Consecutive Intermittency with a single pass over the data.
     If an atom is absent for a number of frames equal or smaller
@@ -103,30 +101,27 @@ def correct_intermittency(intermittency, id_list, verbose=False):
 
     Parameters
     ----------
+    id_list: list of sets
+        returns a new list with the IDs with added IDs which disappeared for <= :param intermittency
     intermittency: int
         the max gap allowed and to be corrected
-    id_list: fixme
-        modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency
-    verbose: Boolean, optional
-        print
     """
     if intermittency == 0:
-        return
+        return list_of_sets
 
-    if verbose:
-        print('Correcting the selected IDs for intermittancy (gaps). ')
+    list_of_sets = deepcopy(list_of_sets)
 
-    for i, ids in enumerate(id_list):
+    for i, ids in enumerate(list_of_sets):
         # initially update each frame as seen 0 ago (now)
         seen_frames_ago = {i: 0 for i in ids}
         for j in range(1, intermittency + 2):
             for atomid in seen_frames_ago.keys():
                 # no more frames
-                if i + j >= len(id_list):
+                if i + j >= len(list_of_sets):
                     continue
 
                 # if the atom is absent now
-                if not atomid in id_list[i + j]:
+                if not atomid in list_of_sets[i + j]:
                     # increase its absence counter
                     seen_frames_ago[atomid] += 1
                     continue
@@ -140,10 +135,11 @@ def correct_intermittency(intermittency, id_list, verbose=False):
                 if seen_frames_ago[atomid] > intermittency:
                     continue
 
-                # the atom was absent but returned (within <= intermittency)
+                # the atom was absent but returned (within <= intermittency_value)
                 # add it to the frames where it was absent.
                 # Introduce the corrections.
                 for k in range(seen_frames_ago[atomid], 0, -1):
-                    id_list[i + j - k].add(atomid)
+                    list_of_sets[i + j - k].add(atomid)
 
-                seen_frames_ago[atomid] = 0
\ No newline at end of file
+                seen_frames_ago[atomid] = 0
+    return list_of_sets
\ No newline at end of file
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index a77751f3618..1f578f2a7ed 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -450,32 +450,30 @@
 import warnings
 import numpy as np
 
-import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
-from .utils.autocorrelation import autocorrelation
+from .utils.autocorrelation import autocorrelation, correct_intermittency
 
 logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')
 
 
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
-    fixme - update the definition of intermittency (refer the user to autocorrelation)
+    fixme - deprecate it
 
-    This is an autocorrelation function that gives the "Hydrogen Bond Lifetimes"
+    This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
     obtain the continuous and intermittent behavior of hydrogen bonds in
     time. A fast decay in these parameters indicate a fast change in HBs
-    connectivity. A slow decay indicate stable hydrogen bonds (e.g. ice).
-    The HBL is also know as "Hydrogen Bond Population Relaxation"
-    (HBPR). In the continuous case we have:
+    connectivity. A slow decay indicate very stables hydrogen bonds, like in
+    ice. The HBL is also know as "Hydrogen Bond Population Relaxation"
+    (HBPR). In the continuos case we have:
 
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
     where :math:`h'_{ij}(t_0+\tau)=1` if there is a H-bond between a pair
-    :math:`ij` during time interval :math:`t_0+\tau` (continuous) and
+    :math:`ij` during time interval :math:`t_0+\tau` (continuos) and
     :math:`h'_{ij}(t_0+\tau)=0` otherwise. In the intermittent case we have:
-    -fixme - update the intermittent definition
 
     .. math::
        C_{HB}^i(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
@@ -494,111 +492,24 @@ class HydrogenBondLifetimes(object):
       It could be any selection available in MDAnalysis, not just water.
     selection2 : str
       Selection string to analize its HBL against selection1
-    verbose : Boolean, optional
-      When True, prints progress and comments to the console.
+    t0 : int
+      frame  where analysis begins
+    tf : int
+      frame where analysis ends
+    dtmax : int
+      Maximum dt size, `dtmax` < `tf` or it will crash.
+    nproc : int
+      Number of processors to use, by default is 1.
+
 
     .. versionadded:: 0.11.0
     """
 
     def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False):
-        self.universe = universe
-        self.selection1 = selection1
-        self.selection2 = selection2
-        self.verbose = verbose
-
-        # backward compatibility
-        self.start = self.stop = self.tau_max = None
-        if t0 is not None:
-            self.start = t0
-            warnings.warn("t0 is deprecated, use run(start) instead", category=DeprecationWarning)
-
-        if tf is not None:
-            self.stop = tf
-            warnings.warn("tf is deprecated, use run(stop) instead", category=DeprecationWarning)
-
-        if dtmax is not None:
-            self.tau_max = dtmax
-            warnings.warn("dtmax is deprecated, use run(tau_max) instead", category=DeprecationWarning)
-
-
-    def run(self, tau_max=20, start=0, stop=None, step=1, intermittency=0, verbose=False, **kwargs):
-        """
-        Computes and returns the Survival Probability (SP) timeseries
-
-        Parameters
-        ----------
-        start : int, optional
-            Zero-based index of the first frame to be analysed
-        stop : int, optional
-            Zero-based index of the last frame to be analysed (inclusive)
-        step : int, optional
-            Jump every `step`-th frame. This is compatible but independant of the taus used.
-            Note that `step` and `tau_max` work consistently with intermittency.
-        tau_max : int, optional
-            Survival probability is calculated for the range 1 <= `tau` <= `tau_max`
-        intermittency : int, optional
-            The maximum number of consecutive frames for which a bond can disappear but be counted as present if it
-            returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does
-            not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may
-            disappear for up to two consecutive frames yet be treated as being present at all frames.
-            The default is continuous (0).
-        verbose : Boolean, optional
-            Print the progress to the console
-
-        Returns
-        -------
-        tau_timeseries : list
-            tau from 1 to `tau_max`. Saved in the field tau_timeseries.
-        timeseries : list
-            survival probability for each value of `tau`. Saved in the field sp_timeseries.
-        timeseries_data: list
-            raw datapoints from which the average is taken (sp_timeseries).
-            Time dependancy and distribution can be extracted.
-        """
-
-        # backward compatibility (and priority)
-        start = self.start if self.start is not None else start
-        stop = self.stop if self.stop is not None else stop
-        tau_max = self.tau_max if self.tau_max is not None else tau_max
-
-        # sanity checks
-        if stop is not None and stop >= len(self.universe.trajectory):
-            raise ValueError("\"stop\" must be smaller than the number of frames in the trajectory.")
-
-        if stop is None:
-            stop = len(self.universe.trajectory)
-        else:
-            stop = stop + 1
-
-        if tau_max > (stop - start):
-            raise ValueError("Too few frames selected for given tau_max.")
-
-        # Get all hydrogen bonds
-        # fixme - we should not have the frozen definition of the hydrogen bonds
-        # fixme - this will be upated with the new HydrogenBond interface
-        # fixme - remove this from here and insert it into the example
-        hydrogen_bonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
-                                                                 self.selection1,
-                                                                 self.selection2,
-                                                                 distance=3.5,
-                                                                 angle=120.0)
-        # Compute the minimal number of hydrogen bonds
-        hydrogen_bonds.run(start=start, stop=stop, step=step, verbose=verbose, **kwargs)
-
-        # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), set(), set()]
-        found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds.timeseries]
-
-        tau_timeseries, timeseries, timeseries_data = autocorrelation(found_hydrogen_bonds, self.tau_max)
-
-        # fixme - document
-        self.hydrogen_bonds = hydrogen_bonds.timeseries
-
-        # user can investigate the distribution and sample size
-        self.sp_timeseries_data = timeseries_data
-        self.tau_timeseries = tau_timeseries
-        self.timeseries = timeseries
-        return self
-
+        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
+                      "which has .autocorrelation function",
+                      category=DeprecationWarning)
+        pass
 
 
 class WaterOrientationalRelaxation(object):
@@ -1207,7 +1118,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             raise ValueError("Too few frames selected for given tau_max.")
 
         # preload the frames (atom IDs) to a list of sets
-        self.selected_ids = []
+        self._selected_ids = []
 
         # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices,
         # on which the parallel _single_frame can be applied.
@@ -1243,7 +1154,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
 
             # SP of residues or of atoms
             ids = atoms.residues.resids if residues else atoms.ids
-            self.selected_ids.append(set(ids))
+            self._selected_ids.append(set(ids))
 
             frame_no += 1
             frame_loaded_counter += 1
@@ -1252,8 +1163,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         # and for extra frames that were loaded (intermittency)
         window_jump = step - num_frames_to_skip
 
-        tau_timeseries, sp_timeseries, sp_timeseries_data = \
-            autocorrelation(self.selected_ids, tau_max, window_jump,intermittency)
+        self._intermittent_selected_ids = correct_intermittency(self._selected_ids, intermittency=intermittency)
+        tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(self._intermittent_selected_ids,
+                                                                            tau_max, window_jump)
 
         # warn the user if the NaN are found
         if all(np.isnan(sp_timeseries[1:])):
diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py
index b10cce2624e..cb00248b087 100644
--- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py
+++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py
@@ -399,6 +399,39 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table):
             assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)")
 
 
+class TestHydrogenBondAnalysisAutocorrelation(object):
+    @staticmethod
+    @pytest.fixture(scope='class')
+    def universe():
+        return MDAnalysis.Universe(waterPSF, waterDCD)
+
+    @staticmethod
+    @pytest.fixture(scope='class')
+    def selection():
+        return "byres name OH2"
+
+    def test_HydrogenBondLifetimes(self, universe, selection):
+        hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
+                                                                selection,
+                                                                selection,
+                                                                distance=3.5,
+                                                                angle=120.0)
+        hbonds.run(start=0, stop=4)
+        hbonds.autocorrelation(tau_max=3)
+        assert_almost_equal(hbonds.acf_timeseries[3], 0.75)
+
+    def test_HydrogenBondLifetimes_growing_continuous(self, universe, selection):
+        hbonds = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
+                                                                selection,
+                                                                selection,
+                                                                distance=3.5,
+                                                                angle=120.0)
+        hbonds.run(start=0, stop=9)
+        hbonds.autocorrelation(tau_max=5)
+        assert all([frame >= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])])
+
+
+
 class TestHydrogenBondAnalysisTIP3PHeavyPBC(object):
     @staticmethod
     @pytest.fixture(scope='class')
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index 978b82bc29b..e9a4440c7c5 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -23,7 +23,7 @@
 from __future__ import print_function, absolute_import
 import MDAnalysis
 from MDAnalysis.analysis import waterdynamics
-from MDAnalysis.analysis.utils.autocorrelation import autocorrelation
+from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency
 
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
 
@@ -42,23 +42,6 @@ def universe():
     return MDAnalysis.Universe(waterPSF, waterDCD)
 
 
-def test_HydrogenBondLifetimes(universe):
-    hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, 0, 4, 3)
-    hbl.run(stop=5)
-    assert_almost_equal(hbl.timeseries[3], 0.75)
-
-
-
-def test_HydrogenBondLifetimes_growing_continuous(universe):
-    # The autocorrelation cannot grow
-    hbl = waterdynamics.HydrogenBondLifetimes(
-        universe, SELECTION1, SELECTION1, t0=0, tf=9, dtmax=5)
-    hbl.run()
-    # Index 0 in frameX[0] refers to the continuous, 1 to intermittent
-    assert all([frameX >= frameXplus1 for frameX, frameXplus1 in zip(hbl.timeseries, hbl.timeseries[1:])])
-
-
 def test_WaterOrientationalRelaxation(universe):
     wor = waterdynamics.WaterOrientationalRelaxation(
         universe, SELECTION1, 0, 5, 2)
@@ -96,77 +79,24 @@ def test_MeanSquareDisplacement_zeroMolecules(universe):
     assert_almost_equal(msd_zero.timeseries[1], 0.0)
 
 
-def test_SurvivalProbability_t0tf(universe):
-    with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )]
-        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=2, stop=6)
-        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
-
-
-def test_SurvivalProbability_definedTaus(universe):
-    with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)]
-        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
-
-
-def test_SurvivalProbability_zeroMolecules(universe):
-    # no atom IDs found
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=3, stop=6, verbose=True)
-        assert all(np.isnan(sp.sp_timeseries[1:]))
-
-
-def test_SurvivalProbability_alwaysPresent(universe):
-    # always the same atom IDs found, 7 and 8
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=3, start=0, stop=6, verbose=True)
-        assert all(np.equal(sp.sp_timeseries, 1))
-
-
-def test_SurvivalProbability_stepLargerThanDtmax(universe):
-    # Testing if the frames are skipped correctly
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=2, step=5, stop=9, verbose=True)
-        assert_equal(sp.sp_timeseries, [1, 1, 1])
-        # with tau_max=2 for all the frames we only read 6 of them
-        # this is because the frames which are not used are skipped, and therefore 'select_atoms'
-        assert universe.trajectory.n_frames > 6
-        assert_equal(select_atoms_mock.call_count, 6)
-
-
-def test_SurvivalProbability_stepEqualDtMax(universe):
-    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
-        sp = waterdynamics.SurvivalProbability(universe, "")
-        sp.run(tau_max=4, step=5, stop=9, verbose=True)
-        # all frames from 0, with 9 inclusive
-        assert_equal(select_atoms_mock.call_count, 10)
-
-
 def test_SurvivalProbability_intermittency1and2(universe):
     """
     Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
     but then returns at the following step.
     """
     with patch.object(universe, 'select_atoms') as select_atoms_mock:
-        ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)]
+        ids = [(9, 8), (), (8,), (9,), (8,), (), (9, 8), (), (8,), (9, 8)]
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=3, stop=9, verbose=True, intermittency=2)
-        assert all((x == {9, 8} for x in sp.selected_ids))
+        assert all(x == {9, 8} for x in sp._intermittent_selected_ids)
         assert_almost_equal(sp.sp_timeseries, [1, 1, 1, 1])
 
 
 def test_SurvivalProbability_intermittency2lacking(universe):
     """
-    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
+    If an atom is not present for more than 2 consecutive frames,
+    it is considered to have left the region.
     """
     with patch.object(universe, 'select_atoms') as select_atoms_mock:
         ids = [(9,), (), (), (), (9,), (), (), (), (9,)]
@@ -186,7 +116,7 @@ def test_SurvivalProbability_intermittency1_step5_noSkipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == {2, 3} for x in sp.selected_ids))
+        assert all((x == {2, 3} for x in sp._intermittent_selected_ids))
         assert_almost_equal(sp.sp_timeseries, [1, 1, 1])
 
 
@@ -201,11 +131,42 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe):
         select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
         sp = waterdynamics.SurvivalProbability(universe, "")
         sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5)
-        assert all((x == {1} for x in sp.selected_ids))
-        assert len(sp.selected_ids) == beforepopsing
+        assert all((x == {1} for x in sp._intermittent_selected_ids))
+        assert len(sp._selected_ids) == beforepopsing
         assert_almost_equal(sp.sp_timeseries, [1, 1])
 
 
+def test_intermittency_none():
+    # No changes asked - returns the same data
+    input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
+    corrected = correct_intermittency(input_ids, intermittency=0)
+    assert all(x == y for x,y in zip(input_ids, corrected))
+
+
+def test_intermittency_1and2():
+    # The maximum gap in the dataset is 2, so the IDs are always present after correction
+    input_ids = [{9, 8}, set(), {8, }, {9, }, {8, }, set(), {9, 8}, set(), {8, }, {9, 8, }]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all((x == {9, 8} for x in corrected))
+
+
+def test_intermittency_2tooShort():
+    #The IDs are abscent for too long/
+    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all(x == y for x, y in zip(input_ids, corrected))
+
+
+def test_intermittency_setsOfSets():
+    # Verificaiton for the case of hydrogen bonds (sets of sets)
+    input_ids = [{frozenset({1,2}), frozenset({3, 4})},set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}, set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}, set(), set(),
+                 {frozenset({1, 2}), frozenset({3, 4})}]
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    assert all((x == {frozenset({1, 2}), frozenset({3, 4})} for x in corrected))
+
+
 def test_autocorrelation_alwaysPresent():
     input = [{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}]
     tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input, tau_max=3)
@@ -218,62 +179,99 @@ def test_autocorrelation_definedTaus():
     assert_almost_equal(sp_timeseries, [1, 2/3., 1/3., 0])
 
 
-def test_autocorrelation_intermittency1and2():
-    """
-    Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames,
-    but then returns at the following step.
-    """
-    input_ids = [{9, 8}, set(), {8,}, {9,}, {8,}, set(), {9, 8}, set(), {8,}, {9, 8,}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
-    # input_ids are modified in place, check if the gaps have been removed correctly
-    assert all((x == {9, 8} for x in input_ids))
-    assert_almost_equal(sp_timeseries, [1, 1, 1, 1])
-    
-
-def test_autocorrelation_intermittency2lacking():
-    """
-    If an atom is not present for more than 2 consecutive frames, it is considered to have left the region.
-    """
-    input_ids = [{9,}, {}, {}, {}, {9,}, {}, {}, {}, {9,}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=3, intermittency=2)
-    assert_almost_equal(sp_timeseries, [1, 0, 0, 0])
-
-
 def test_autocorrelation_intermittency1_windowJump_intermittencyAll():
     """
     Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step.
     No frames should be skipped so intermittency should be applied to all.
     """
     input_ids = [{2, 3}, {3,}, {2, 3}, {3,}, {2,}, {3,}, {2, 3}, {3,}, {2, 3}, {2, 3}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
-                                                                        intermittency=1, window_jump=5)
-    assert all((x == {2, 3} for x in input_ids))
+    corrected = correct_intermittency(input_ids, intermittency=1)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2,
+                                                                        window_step=5)
+    assert all((x == {2, 3} for x in corrected))
     assert_almost_equal(sp_timeseries, [1, 1, 1])
 
 
-def test_autocorrelation_intermittency0_windowBigJump():
-    """
-    The empty sets are ignored (no interrttency)
-    """
+def test_autocorrelation_windowBigJump():
+    #The empty sets are ignored (no intermittency)
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, {1}, {1}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5)
     assert_almost_equal(sp_timeseries, [1, 1, 1])
 
 
-def test_autocorrelation_intermittency0_windowBigJump_absence():
-    """
-    In the last frame the molecules are absent
-    """
+def test_autocorrelation_windowBigJump_absence():
+    # In the last frame the molecules are absent
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set()]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_jump=5)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2, window_step=5)
     assert_almost_equal(sp_timeseries, [1, 2/3., 2/3.])
 
 
+def test_autocorrelation_intermittency1_many():
+    input_ids = [{1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}, set(), {1}]
+    corrected = correct_intermittency(input_ids, intermittency=1)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=14,
+                                                                        window_step=5)
+    assert_almost_equal(sp_timeseries, [1] * 15)
+
+
 def test_autocorrelation_intermittency2_windowBigJump():
-    """
-    The intermittency corrects the last frame
-    """
+    # The intermittency corrects the last frame
     input_ids = [{1}, {1}, {1}, set(), set(), {1}, {1}, {1}, set(), set(), {1}, set(), set(), {1}]
-    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(input_ids, tau_max=2,
-                                                                        intermittency=2, window_jump=5)
+    corrected = correct_intermittency(input_ids, intermittency=2)
+    tau_timeseries, sp_timeseries, sp_timeseries_data = autocorrelation(corrected, tau_max=2,
+                                                                        window_step=5)
     assert_almost_equal(sp_timeseries, [1, 1, 1])
+
+
+def test_SurvivalProbability_t0tf(universe):
+    with patch.object(universe, 'select_atoms') as select_atoms_mock:
+        ids = [(0, ), (0, ), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1), (0, )]
+        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop(2))   # atom IDs fed set by set
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=2, stop=6)
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
+
+
+def test_SurvivalProbability_definedTaus(universe):
+    with patch.object(universe, 'select_atoms') as select_atoms_mock:
+        ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)]
+        select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop())   # atom IDs fed set by set
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=0, stop=6, verbose=True)
+        assert_almost_equal(sp.sp_timeseries, [1, 2 / 3.0, 1 / 3.0, 0])
+
+
+def test_SurvivalProbability_zeroMolecules(universe):
+    # no atom IDs found
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=3, stop=6, verbose=True)
+        assert all(np.isnan(sp.sp_timeseries[1:]))
+
+
+def test_SurvivalProbability_alwaysPresent(universe):
+    # always the same atom IDs found, 7 and 8
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=3, start=0, stop=6, verbose=True)
+        assert all(np.equal(sp.sp_timeseries, 1))
+
+
+def test_SurvivalProbability_stepLargerThanDtmax(universe):
+    # Testing if the frames are skipped correctly
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=2, step=5, stop=9, verbose=True)
+        assert_equal(sp.sp_timeseries, [1, 1, 1])
+        # with tau_max=2 for all the frames we only read 6 of them
+        # this is because the frames which are not used are skipped, and therefore 'select_atoms'
+        assert universe.trajectory.n_frames > 6
+        assert_equal(select_atoms_mock.call_count, 6)
+
+
+def test_SurvivalProbability_stepEqualDtMax(universe):
+    with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock:
+        sp = waterdynamics.SurvivalProbability(universe, "")
+        sp.run(tau_max=4, step=5, stop=9, verbose=True)
+        # all frames from 0, with 9 inclusive
+        assert_equal(select_atoms_mock.call_count, 10)

From 7514377698b43787760512b55cec99c520ca4d3f Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Mon, 29 Apr 2019 13:08:31 +0100
Subject: [PATCH 17/40] Updated logging.

---
 package/MDAnalysis/analysis/waterdynamics.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 1f578f2a7ed..06476dc4346 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -509,7 +509,6 @@ def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=Non
         warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
                       "which has .autocorrelation function",
                       category=DeprecationWarning)
-        pass
 
 
 class WaterOrientationalRelaxation(object):
@@ -1138,7 +1137,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         frame_no = start
         while frame_no < stop:      # we have already added 1 to stop, therefore <
             if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window:
-                logger.info("Skipping the next %d frames:" % num_frames_to_skip)
+                logger.info("Skipping the next %d frames:", num_frames_to_skip)
                 frame_no += num_frames_to_skip
                 frame_loaded_counter = 0
                 # Correct the number of frames to be loaded after the first window (which starts at t=0, and
@@ -1149,7 +1148,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
             # update the frame number
             self.universe.trajectory[frame_no]
 
-            logging.info("Loading frame:", self.universe.trajectory.ts)
+            logger.info("Loading frame: %d", self.universe.trajectory.frame)
             atoms = self.universe.select_atoms(self.selection)
 
             # SP of residues or of atoms
@@ -1169,7 +1168,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
 
         # warn the user if the NaN are found
         if all(np.isnan(sp_timeseries[1:])):
-            logging.warning('NaN Error: Most likely data was not found. Check your atom selections. ')
+            logger.warning('NaN Error: Most likely data was not found. Check your atom selections. ')
 
         # user can investigate the distribution and sample size
         self.sp_timeseries_data = sp_timeseries_data

From f04eb690db02bc458aac05674a798aea83bdaa11 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Mon, 29 Apr 2019 13:10:17 +0100
Subject: [PATCH 18/40] Added __future__ imports.

---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index efb25c40623..5349eebf6b9 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -21,6 +21,7 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
+from __future__ import absolute_import, division
 import numpy as np
 from copy import deepcopy
 

From 2f0d37dff412a2b79a9bf8c106d9fcd309d79a34 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Sun, 1 Mar 2020 12:39:44 +0000
Subject: [PATCH 19/40] Remove deprecation warning for HydrogenBondLifetime.

This function will be removed in a future PR once a replacement that uses the new hydrogen bond class is in place.
---
 package/MDAnalysis/analysis/waterdynamics.py | 208 ++++++++++++++++++-
 1 file changed, 203 insertions(+), 5 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 06476dc4346..ac5beaa7ef2 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -458,7 +458,6 @@
 
 class HydrogenBondLifetimes(object):
     r"""Hydrogen bond lifetime analysis
-    fixme - deprecate it
 
     This is a autocorrelation function that gives the "Hydrogen Bond Lifetimes"
     (HBL) proposed by D.C. Rapaport [Rapaport1983]_. From this function we can
@@ -505,10 +504,209 @@ class HydrogenBondLifetimes(object):
     .. versionadded:: 0.11.0
     """
 
-    def __init__(self, universe, selection1, selection2, t0=None, tf=None, dtmax=None, verbose=False):
-        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
-                      "which has .autocorrelation function",
-                      category=DeprecationWarning)
+    def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
+                 nproc=1):
+        #warnings.warn("This class is deprecated; use analysis.hydrogen_bonds.hbond_analysis.HydrogenBondAnalysis ",
+        #              category=DeprecationWarning)
+        self.universe = universe
+        self.selection1 = selection1
+        self.selection2 = selection2
+        self.t0 = t0
+        self.tf = tf - 1
+        self.dtmax = dtmax
+        self.nproc = nproc
+        self.timeseries = None
+
+    def _getC_i(self, HBP, t0, t):
+        """
+        This function give the intermitent Hydrogen Bond Lifetime
+        C_i = <h(t0)h(t)>/<h(t0)> between t0 and t
+        """
+        C_i = 0
+        for i in range(len(HBP[t0])):
+            for j in range(len(HBP[t])):
+                if (HBP[t0][i][0] == HBP[t][j][0] and HBP[t0][i][1] == HBP[t][j][1]):
+                    C_i += 1
+                    break
+        if len(HBP[t0]) == 0:
+            return 0.0
+        else:
+            return float(C_i) / len(HBP[t0])
+
+    def _getC_c(self, HBP, t0, t):
+        """
+        This function give the continous Hydrogen Bond Lifetime
+        C_c = <h(t0)h'(t)>/<h(t0)> between t0 and t
+        """
+        C_c = 0
+        dt = 1
+        begt0 = t0
+        HBP_cp = HBP
+        HBP_t0 = HBP[t0]
+        newHBP = []
+        if t0 == t:
+            return 1.0
+        while t0 + dt <= t:
+            for i in range(len(HBP_t0)):
+                for j in range(len(HBP_cp[t0 + dt])):
+                    if (HBP_t0[i][0] == HBP_cp[t0 + dt][j][0] and
+                        HBP_t0[i][1] == HBP_cp[t0 + dt][j][1]):
+                        newHBP.append(HBP_t0[i])
+                        break
+            C_c = len(newHBP)
+            t0 += dt
+            HBP_t0 = newHBP
+            newHBP = []
+        if len(HBP[begt0]) == 0:
+            return 0
+        else:
+            return C_c / float(len(HBP[begt0]))
+
+    def _intervC_c(self, HBP, t0, tf, dt):
+        """
+        This function gets all the data for the h(t0)h(t0+dt)', where
+        t0 = 1,2,3,...,tf. This function give us one point of the final plot
+        HBL vs t
+        """
+        a = 0
+        count = 0
+        for i in range(len(HBP)):
+            if (t0 + dt <= tf):
+                if t0 == t0 + dt:
+                    b = self._getC_c(HBP, t0, t0)
+                    break
+                b = self._getC_c(HBP, t0, t0 + dt)
+                t0 += dt
+                a += b
+                count += 1
+        if count == 0:
+            return 1.0
+        return a / count
+
+    def _intervC_i(self, HBP, t0, tf, dt):
+        """
+        This function gets all the data for the h(t0)h(t0+dt), where
+        t0 = 1,2,3,...,tf. This function give us a point of the final plot
+        HBL vs t
+        """
+        a = 0
+        count = 0
+        for i in range(len(HBP)):
+            if (t0 + dt <= tf):
+                b = self._getC_i(HBP, t0, t0 + dt)
+                t0 += dt
+                a += b
+                count += 1
+        return a / count
+
+    def _finalGraphGetC_i(self, HBP, t0, tf, maxdt):
+        """
+        This function gets the final data of the C_i graph.
+        """
+        output = []
+        for dt in range(maxdt):
+            a = self._intervC_i(HBP, t0, tf, dt)
+            output.append(a)
+        return output
+
+    def _finalGraphGetC_c(self, HBP, t0, tf, maxdt):
+        """
+        This function gets the final data of the C_c graph.
+        """
+        output = []
+        for dt in range(maxdt):
+            a = self._intervC_c(HBP, t0, tf, dt)
+            output.append(a)
+        return output
+
+    def _getGraphics(self, HBP, t0, tf, maxdt):
+        """
+        Function that join all the results into a plot.
+        """
+        a = []
+        cont = self._finalGraphGetC_c(HBP, t0, tf, maxdt)
+        inte = self._finalGraphGetC_i(HBP, t0, tf, maxdt)
+        for i in range(len(cont)):
+            fix = [cont[i], inte[i]]
+            a.append(fix)
+        return a
+
+    def _HBA(self, ts, conn, universe, selAtom1, selAtom2,
+             verbose=False):
+        """
+        Main function for calculate C_i and C_c in parallel.
+        """
+        finalGetResidue1 = selAtom1
+        finalGetResidue2 = selAtom2
+        frame = ts.frame
+        h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe,
+                                                            finalGetResidue1,
+                                                            finalGetResidue2,
+                                                            distance=3.5,
+                                                            angle=120.0,
+                                                            start=frame - 1,
+                                                            stop=frame)
+        while True:
+            try:
+                h.run(verbose=verbose)
+                break
+            except:
+                print("error")
+                print("trying again")
+                sys.stdout.flush()
+        sys.stdout.flush()
+        conn.send(h.timeseries[0])
+        conn.close()
+
+    def run(self, **kwargs):
+        """Analyze trajectory and produce timeseries"""
+        h_list = []
+        i = 0
+        if (self.nproc > 1):
+            while i < len(self.universe.trajectory):
+                jobs = []
+                k = i
+                for j in range(self.nproc):
+                        # start
+                    print("ts=", i + 1)
+                    if i >= len(self.universe.trajectory):
+                        break
+                    conn_parent, conn_child = multiprocessing.Pipe(False)
+                    while True:
+                        try:
+                            # new thread
+                            jobs.append(
+                                (multiprocessing.Process(
+                                    target=self._HBA,
+                                    args=(self.universe.trajectory[i],
+                                          conn_child, self.universe,
+                                          self.selection1, self.selection2,)),
+                                 conn_parent))
+                            break
+                        except:
+                            print("error in jobs.append")
+                    jobs[j][0].start()
+                    i = i + 1
+
+                for j in range(self.nproc):
+                    if k >= len(self.universe.trajectory):
+                        break
+                    rec01 = jobs[j][1]
+                    received = rec01.recv()
+                    h_list.append(received)
+                    jobs[j][0].join()
+                    k += 1
+            self.timeseries = self._getGraphics(
+                h_list, 0, self.tf - 1, self.dtmax)
+        else:
+            h_list = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe,
+                                                                     self.selection1,
+                                                                     self.selection2,
+                                                                     distance=3.5,
+                                                                     angle=120.0)
+            h_list.run(**kwargs)
+            self.timeseries = self._getGraphics(
+                h_list.timeseries, self.t0, self.tf, self.dtmax)
 
 
 class WaterOrientationalRelaxation(object):

From efa8998d02be0fd0d032e4ebdb8da26982941470 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Sun, 1 Mar 2020 12:59:40 +0000
Subject: [PATCH 20/40] Updated docs.

---
 .../analysis/utils/autocorrelation.py         | 45 ++++++++++---------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 5349eebf6b9..f4e841972a9 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -29,13 +29,9 @@
 def autocorrelation(list_of_sets, tau_max, window_step=1):
     r"""The descrete implementation of the autocorrelation function.
 
-    Here is a random equation that shows something.
-
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
 
-
-    fixme -  list_of_sets: Modifies in place!
     Parameters
     ----------
     list_of_sets : list
@@ -44,12 +40,6 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
       The last tau (inclusive) for which to carry out autocorrelation.
     window_step : int, optional
       The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
-    fixme - move intermittency to the right place
-    intermittency : int, optional
-      Helps with the graps in the data. If we want to remove some of the fluctuations and focus on the patterns,
-      intermittency removes the gaps. The default intermittency=0 which means that if the datapoint is missing at any
-      frame, it is not counted. With the value of 2 the datapoint can be missing for 2 consecutive frames but it will
-      be counted as present.
 
     Returns
     --------
@@ -92,20 +82,35 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 
     return tau_timeseries, timeseries, timeseries_data
 
-# fixme - is intermittency consitent with list of sets of sets? (hydrogen bonds)
+
 def correct_intermittency(list_of_sets, intermittency):
     """
-    Pre-process Consecutive Intermittency with a single pass over the data.
-    If an atom is absent for a number of frames equal or smaller
-    than the parameter intermittency, then correct the data and remove the absence.
-    ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence
+    Process consecutive intermittency with a single pass over the data. The returned data can be used as input to
+    the function `autocorrelation` in order to calculate the discrete autocorrelation function with a given
+    intermittency.
+
+    If an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, then correct
+    the data and remove the absence. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence.
+
+    # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds)
 
     Parameters
     ----------
-    id_list: list of sets
-        returns a new list with the IDs with added IDs which disappeared for <= :param intermittency
-    intermittency: int
-        the max gap allowed and to be corrected
+    list_of_sets: list
+        In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a
+        single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}]
+    intermittency : int
+      The maximum gap allowed. The default intermittency=0 means that if the datapoint is missing at any frame, no
+      changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two
+       consecutive frames will be instead be considered present.
+
+    Returns
+    -------
+        returns a new list with the IDs with added IDs which disappeared for <= :param intermittency.
+        e.g If [{0, 1}, {0}, {0}, {0, 1}] is a list of sets of atom ids present at each frame and `intermittency=2`,
+        both atoms will be considered present throughout and thus the returned list of sets will be
+        [{0, 1}, {0, 1}, {0, 1}, {0, 1}].
+
     """
     if intermittency == 0:
         return list_of_sets
@@ -143,4 +148,4 @@ def correct_intermittency(list_of_sets, intermittency):
                     list_of_sets[i + j - k].add(atomid)
 
                 seen_frames_ago[atomid] = 0
-    return list_of_sets
\ No newline at end of file
+    return list_of_sets

From abe62cb603c5587ccb031702b7c9316ca32193f9 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Sun, 1 Mar 2020 13:09:45 +0000
Subject: [PATCH 21/40] Added checks for parameters types and dimensions.

---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index f4e841972a9..f28e64bd8cb 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -52,9 +52,14 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 
     .. versionadded:: 0.19.2
     """
-    # fixme - add checks if this is a list of sets
-    # fixme - check dimensions
-    # fixme - check parameters?
+
+    # Check types
+    assert type(list_of_sets) == list, "list_of_sets must be a one-dimensional list of sets"
+    assert type(list_of_sets[0]) == set,  "list_of_sets must be a one-dimensional list of sets"
+
+    # Check dimensions of parameters
+    assert len(list_of_sets) > tau_max > window_step, "window_step cannot be greater than tau_max, which cannot" \
+                                                       "be greater than the length of list_of_sets"
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]

From bef47c21117d5c2779045658d49b2751c7d06911 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Sun, 1 Mar 2020 13:15:18 +0000
Subject: [PATCH 22/40] Fixed imports.

---
 package/MDAnalysis/analysis/waterdynamics.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index ac5beaa7ef2..d74ecfe310d 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -449,7 +449,9 @@
 import logging
 import warnings
 import numpy as np
+import multiprocessing
 
+import MDAnalysis.analysis.hbonds
 from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation, correct_intermittency
 

From 60b2b8a16038c023a8215860644b8dd29249c1f1 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Sun, 1 Mar 2020 13:26:01 +0000
Subject: [PATCH 23/40] Removed deprecation warning.

The hydrogen bond lifetime calculation will be replaced in a future PR>.

Co-authored-by: pjsmith <paul.smith@kcl.ac.uk>
Co-authored-by: bieniekmateusz <bieniekmat@gmail.com>
---
 package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
index 93b332e6ec7..3f98bba8f34 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
@@ -297,9 +297,9 @@ def __init__(self, universe,
                  nsamples=50,  # number of different points to sample in a run
                  pbc=True):
 
-        warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
-                      "which has .autocorrelation function",
-                      category=DeprecationWarning)
+        #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis "
+        #              "which has .autocorrelation function",
+        #              category=DeprecationWarning)
 
         self.u = universe
         # check that slicing is possible

From 725c2fc5c8f9602894ae0bea46120c40a27a3c4c Mon Sep 17 00:00:00 2001
From: Paul Smith <paul.smith@kcl.ac.uk>
Date: Mon, 2 Mar 2020 10:55:42 +0000
Subject: [PATCH 24/40] Removed incorrect Assertion Error.

Co-authored-by: pjsmith <paul.smith@kcl.ac.uk>
Co-authored-by: bieniekmateusz <bieniekmat@gmail.com>
---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index f28e64bd8cb..3b174a76b32 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -58,8 +58,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     assert type(list_of_sets[0]) == set,  "list_of_sets must be a one-dimensional list of sets"
 
     # Check dimensions of parameters
-    assert len(list_of_sets) > tau_max > window_step, "window_step cannot be greater than tau_max, which cannot" \
-                                                       "be greater than the length of list_of_sets"
+    assert len(list_of_sets) > tau_max, "tau_max cannot be greater than the length of list_of_sets"
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]

From 221034449bf4fca486c47390b0ba75bccffb30fd Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Mon, 6 Apr 2020 19:26:30 +0100
Subject: [PATCH 25/40] Adding the test case back to the hydrogenBondLifetime.

---
 testsuite/MDAnalysisTests/analysis/test_waterdynamics.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index e9a4440c7c5..e4c39889537 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -42,6 +42,13 @@ def universe():
     return MDAnalysis.Universe(waterPSF, waterDCD)
 
 
+def test_HydrogenBondLifetimes(universe):
+    hbl = waterdynamics.HydrogenBondLifetimes(
+        universe, SELECTION1, SELECTION1, 0, 5, 3)
+    hbl.run()
+    assert_almost_equal(hbl.timeseries[2][1], 0.75, 5)
+
+
 def test_WaterOrientationalRelaxation(universe):
     wor = waterdynamics.WaterOrientationalRelaxation(
         universe, SELECTION1, 0, 5, 2)

From 0fa3c2bff4d95de8fcaf3e697bfd51b31a3cde7d Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Mon, 6 Apr 2020 19:58:56 +0100
Subject: [PATCH 26/40] Inserted back the accidentally removed PR #2617

---
 package/MDAnalysis/analysis/waterdynamics.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 772e50bf8d0..8df5492fc51 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -811,11 +811,9 @@ def _sameMolecTandDT(self, selection, t0d, tf):
 
     def _selection_serial(self, universe, selection_str):
         selection = []
-        pm = ProgressMeter(universe.trajectory.n_frames,
-                           interval=10, verbose=True)
-        for ts in universe.trajectory:
+        for ts in ProgressBar(universe.trajectory, verbose=True,
+                              total=universe.trajectory.n_frames):
             selection.append(universe.select_atoms(selection_str))
-            pm.echo(ts.frame)
         return selection
 
     @staticmethod
@@ -986,11 +984,9 @@ def run(self, **kwargs):
 
     def _selection_serial(self, universe, selection_str):
         selection = []
-        pm = ProgressMeter(universe.trajectory.n_frames,
-                           interval=10, verbose=True)
-        for ts in universe.trajectory:
+        for ts in ProgressBar(universe.trajectory, verbose=True,
+                              total=universe.trajectory.n_frames):
             selection.append(universe.select_atoms(selection_str))
-            pm.echo(ts.frame)
         return selection
 
 
@@ -1128,11 +1124,9 @@ def _sameMolecTandDT(self, selection, t0d, tf):
 
     def _selection_serial(self, universe, selection_str):
         selection = []
-        pm = ProgressMeter(universe.trajectory.n_frames,
-                           interval=10, verbose=True)
-        for ts in universe.trajectory:
+        for ts in ProgressBar(universe.trajectory, verbose=True,
+                              total=universe.trajectory.n_frames):
             selection.append(universe.select_atoms(selection_str))
-            pm.echo(ts.frame)
         return selection
 
     def run(self, **kwargs):

From 1c12463536dec09abba7ae0760c4460fe453f9a3 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmateusz@users.noreply.github.com>
Date: Mon, 6 Apr 2020 20:00:58 +0100
Subject: [PATCH 27/40] Update
 package/MDAnalysis/analysis/utils/autocorrelation.py

Co-Authored-By: Oliver Beckstein <orbeckst@gmail.com>
---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 3b174a76b32..60b0750cd4b 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -27,7 +27,7 @@
 
 
 def autocorrelation(list_of_sets, tau_max, window_step=1):
-    r"""The descrete implementation of the autocorrelation function.
+    r"""The discrete implementation of the autocorrelation function.
 
     .. math::
        C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}

From bdd967c36922714e1f517a4c9100fa97e61cc349 Mon Sep 17 00:00:00 2001
From: Mateusz Bieniek <bieniekmateusz@users.noreply.github.com>
Date: Mon, 6 Apr 2020 20:01:38 +0100
Subject: [PATCH 28/40] Update
 package/MDAnalysis/analysis/utils/autocorrelation.py

Co-Authored-By: Oliver Beckstein <orbeckst@gmail.com>
---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 60b0750cd4b..c7e43e6fcb3 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -35,7 +35,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     Parameters
     ----------
     list_of_sets : list
-      List of sets,
+      List of sets
     tau_max : int
       The last tau (inclusive) for which to carry out autocorrelation.
     window_step : int, optional

From 30cf75a339303f94ad69db5522780649af06e028 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Mon, 6 Apr 2020 20:11:34 +0100
Subject: [PATCH 29/40] Inserted back the accidentally removed PR #2617
 Correction for PR #2256

---
 package/MDAnalysis/analysis/hbonds/hbond_analysis.py | 2 --
 package/MDAnalysis/analysis/waterdynamics.py         | 1 -
 2 files changed, 3 deletions(-)

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index 6421506cb00..374d65a9d32 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -337,8 +337,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
 
 from ..utils.autocorrelation import autocorrelation, correct_intermittency
 
-from ..utils.autocorrelation import autocorrelation, correct_intermittency
-
 
 logger = logging.getLogger('MDAnalysis.analysis.hbonds')
 
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 8df5492fc51..b030a219c21 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -450,7 +450,6 @@
 import warnings
 import numpy as np
 import MDAnalysis.analysis.hbonds
-from MDAnalysis.lib.log import ProgressMeter
 from .utils.autocorrelation import autocorrelation, correct_intermittency
 
 logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')

From 67a965d0b0c531b7c275b160acba12eee9d3c6b2 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Sat, 11 Apr 2020 18:46:47 +0100
Subject: [PATCH 30/40] Fixed and expanded the documentation for the
 autocorrelation. PR #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 .../analysis/utils/autocorrelation.py         | 61 +++++++++++++++----
 .../documentation_pages/analysis/utils.rst    |  1 +
 .../documentation_pages/analysis_modules.rst  |  8 +++
 3 files changed, 59 insertions(+), 11 deletions(-)
 create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/utils.rst

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index c7e43e6fcb3..820a66f5ea0 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -27,28 +27,62 @@
 
 
 def autocorrelation(list_of_sets, tau_max, window_step=1):
-    r"""The discrete implementation of the autocorrelation function.
+    r"""Implementation of a discrete autocorrelation function.
 
+    The autocorrelation of a property $x$ from a time $t=t_0$ to $t=t_0 + \tau$
+    is given by:
     .. math::
-       C_{HB}^c(\tau) = \frac{\sum_{ij}h_{ij}(t_0)h'_{ij}(t_0+\tau)}{\sum_{ij}h_{ij}(t_0)}
+        C(\tau) = \langle \frac{ x(t_0)x(t_0 +\tau) }{ x(t_0)x(t_0) } \rangle
+
+    where $x$ may represent any property of a particle, such as velocity or
+    potential energy.
+
+    The survival probability, $S(\tau)$, is a special case of the time
+    autocorrelation function in which the property under consideration can
+    be encoded with indicator variables, $0$ and $1$, to represent the binary
+    state of said property. For instance, in calculating the survival probability
+    of water molecules within $5 \rm \AA$, each water molecule will either be
+    within this cutoff range ($1$) or not ($0$). The total number of water
+    molecules within the cutoff at time $t_0$ will be given by $N(t_0)$.
+
+    The survival probability of a property of a set of particles is
+    given by:
+
+    .. math::
+        S(\tau) =  \langle \frac{ N(t_0, t_0 + \tau )} { N(t_0) }\rangle
+
+    where $N(t0)$ is the number of particles at time $t_0$ for which the feature
+    is observed, and $N(t0, t_0 + \tau)$ is the number of particles for which
+    this feature is present at every frame from $t_0$ to $N(t0, t_0 + \tau)$.
+    The angular brackets represent an average over all time origins, $t_0$.
+
+    See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037)
+    for a description survival probability.
 
     Parameters
     ----------
     list_of_sets : list
-      List of sets
+      List of sets. Each set corresponds to data from a single frame. Each element in a set
+      may be, for example, an atom id or a tuple of atoms ids. In the case of calculating the
+      survival probability of water around a protein, these atom ids in a given set will be
+      those of the atoms which are within a cutoff distance of the protein at a given frame.
     tau_max : int
-      The last tau (inclusive) for which to carry out autocorrelation.
+      The last tau (inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20,
+      the survival probability will be calculated over 20 frames.
     window_step : int, optional
-      The step for the t0 to perform autocorrelation (without the overlap). Default is 1.
+      The step size for t0 to perform autocorrelation. Ideally, window_step will be larger than
+       tau_max to ensure independence of each window for which the calculation is performed.
+       Default is 1.
 
     Returns
     --------
     tau_timeseries : list of int
-        the tau for which the autocorrelation was calculated
+        the values of tau for which the autocorrelation was calculated
     timeseries : list of int
         the autocorelation values for each of the tau values
     timeseries_data : list of list of int
-        the raw data from which the autocorrelation is computed. The time dependant evolution can be investigated.
+        the raw data from which the autocorrelation is computed, i.e $S(\tau)$ at each window.
+        This allows the time dependant evolution of $S(\tau)$ to be investigated.
 
     .. versionadded:: 0.19.2
     """
@@ -88,13 +122,18 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 
 
 def correct_intermittency(list_of_sets, intermittency):
-    """
+    """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`.
+
     Process consecutive intermittency with a single pass over the data. The returned data can be used as input to
-    the function `autocorrelation` in order to calculate the discrete autocorrelation function with a given
+    the function `autocorrelation` in order to calculate the survival probability with a given
     intermittency.
 
-    If an atom is absent for a number of frames equal or smaller than the parameter `intermittency`, then correct
-    the data and remove the absence. e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence.
+    For example, if an atom is absent for a number of frames equal or smaller than the parameter `intermittency`,
+    then correct the data and remove the absence.
+    e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence.
+
+    See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of
+    intermittency in the calculation of hydrogen bond lifetimes.
 
     # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds)
 
diff --git a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst
new file mode 100644
index 00000000000..048e8216099
--- /dev/null
+++ b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst
@@ -0,0 +1 @@
+.. automodule:: MDAnalysis.analysis.utils
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
index 19081f1b5dd..6c19528a3e8 100644
--- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
+++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
@@ -147,3 +147,11 @@ Data
    :maxdepth: 1
 
    analysis/data
+
+Utils
+=====
+
+.. toctree::
+   :maxdepth: 1
+
+   analysis/utils

From fa8df30c8591fa7233bea13875c5440b75435112 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Sun, 12 Apr 2020 17:29:13 +0100
Subject: [PATCH 31/40] Introducing the requested corrections PR #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 11 ++++++-----
 package/MDAnalysis/analysis/waterdynamics.py         |  2 --
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 820a66f5ea0..b7cb4284651 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -67,7 +67,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
       survival probability of water around a protein, these atom ids in a given set will be
       those of the atoms which are within a cutoff distance of the protein at a given frame.
     tau_max : int
-      The last tau (inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20,
+      The last tau (lag time, inclusive) for which to calculate the autocorrelation. e.g if tau_max = 20,
       the survival probability will be calculated over 20 frames.
     window_step : int, optional
       The step size for t0 to perform autocorrelation. Ideally, window_step will be larger than
@@ -87,12 +87,13 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     .. versionadded:: 0.19.2
     """
 
-    # Check types
-    assert type(list_of_sets) == list, "list_of_sets must be a one-dimensional list of sets"
-    assert type(list_of_sets[0]) == set,  "list_of_sets must be a one-dimensional list of sets"
+    # check types
+    if (type(list_of_sets) != list and len(list_of_sets) != 0) or type(list_of_sets[0]) != set:
+        raise TypeError("list_of_sets must be a one-dimensional list of sets")
 
     # Check dimensions of parameters
-    assert len(list_of_sets) > tau_max, "tau_max cannot be greater than the length of list_of_sets"
+    if len(list_of_sets) < tau_max:
+        raise ValueError("tau_max cannot be greater than the length of list_of_sets")
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index b030a219c21..c7fb43a5685 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -508,8 +508,6 @@ class HydrogenBondLifetimes(object):
 
     def __init__(self, universe, selection1, selection2, t0, tf, dtmax,
                  nproc=1):
-        #warnings.warn("This class is deprecated; use analysis.hydrogen_bonds.hbond_analysis.HydrogenBondAnalysis ",
-        #              category=DeprecationWarning)
         self.universe = universe
         self.selection1 = selection1
         self.selection2 = selection2

From 45ff277ad7668308c52b507a320b915652eb7b07 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Sun, 12 Apr 2020 17:40:29 +0100
Subject: [PATCH 32/40] Exceptions marked as not covered #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/analysis/utils/autocorrelation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index b7cb4284651..0a8eb9dc1b2 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -89,11 +89,11 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 
     # check types
     if (type(list_of_sets) != list and len(list_of_sets) != 0) or type(list_of_sets[0]) != set:
-        raise TypeError("list_of_sets must be a one-dimensional list of sets")
+        raise TypeError("list_of_sets must be a one-dimensional list of sets")  # pragma: no cover
 
     # Check dimensions of parameters
     if len(list_of_sets) < tau_max:
-        raise ValueError("tau_max cannot be greater than the length of list_of_sets")
+        raise ValueError("tau_max cannot be greater than the length of list_of_sets") # pragma: no cover
 
     tau_timeseries = list(range(1, tau_max + 1))
     timeseries_data = [[] for _ in range(tau_max)]

From e262cd3464fc517087abfcab7a0d86a4e62d8e93 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Sun, 12 Apr 2020 17:50:22 +0100
Subject: [PATCH 33/40] Further clarifications #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 .../analysis/utils/autocorrelation.py         | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 0a8eb9dc1b2..041e24fcc2d 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -125,14 +125,25 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 def correct_intermittency(list_of_sets, intermittency):
     """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`.
 
-    Process consecutive intermittency with a single pass over the data. The returned data can be used as input to
-    the function `autocorrelation` in order to calculate the survival probability with a given
-    intermittency.
-
-    For example, if an atom is absent for a number of frames equal or smaller than the parameter `intermittency`,
-    then correct the data and remove the absence.
+    Survival probabilty may be calculated either with a strict continuous requirement or
+    a less strict intermittency. If calculating the survival probability water around a
+    protein for example, in the former case the water must be within a cutoff distance
+    of the protein a every frame from $t_0$ to $t_0 + \tau$ in order for it to be considered
+    present at $t_0 + \tau$. In the intermittent case, the water molecule is allowed to
+    leave the region of interest for up to a specified consecutive number of frames.
+
+    This function pre-processes data, such as the atom ids of water molecules within a cutoff
+    distance of a protein at each frame, in order to allow for intermittent behaviour, with a
+    single pass over the data.
+
+    For example, if an atom is absent for a number of frames equal or smaller than the parameter
+    `intermittency`, then this absence will be removed and thus the atom is considered to have
+    not left.
     e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence.
 
+    The returned data can be used as input to the function `autocorrelation` in order to calculate
+    the survival probability with a given intermittency.
+
     See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of
     intermittency in the calculation of hydrogen bond lifetimes.
 

From a798d1e9d56d9acd85242c9c907f089f16272155 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Mon, 13 Apr 2020 15:38:48 +0100
Subject: [PATCH 34/40] Complete documentation for PR #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 .../analysis/utils/autocorrelation.py         | 89 ++++++++++++++-----
 .../analysis/autocorrelation.rst              |  9 ++
 .../documentation_pages/analysis/utils.rst    |  1 -
 .../documentation_pages/analysis_modules.rst  |  4 +-
 4 files changed, 77 insertions(+), 26 deletions(-)
 create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
 delete mode 100644 package/doc/sphinx/source/documentation_pages/analysis/utils.rst

diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/analysis/utils/autocorrelation.py
index 041e24fcc2d..c8b751bfad8 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/analysis/utils/autocorrelation.py
@@ -21,6 +21,43 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
+"""Autocorrelation utilities --- :mod:`MDAnalysis.analysis.utils.autocorrelation`
+=================================================================================
+
+
+:Authors: Paul Smith & Mateusz Bieniek
+:Year: 2020
+:Copyright: GNU Public License v2
+
+.. versionadded:: 0.21.0
+
+This module is primarily for internal use by other analysis modules. It
+provides functionality for calculating the time autocorrelation function
+of a binary variable (i.e one that is either true or false at each
+frame for a given atom/molecule/set of molecules). This module includes
+functions for calculating both the time continuous autocorrelation and
+the intermittent autocorrelation. The function :func:`autocorrelation`
+calculates the continuous autocorrelation only. The data may be
+pre-processed using the function :func:`intermittency` in order to
+acount for intermittency before passing the results to
+:func:`autocorrelation`.
+
+See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a further
+discussion on the time continuous and intermittent autocorrelation of
+hydrogen bond lifetimes.
+
+**Analysis tools that make use of modules**
+
+* :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability`
+    Calculates the continuous or intermittent survival probability
+    of an atom group in a region of interest.
+
+* :class:`MDAnalysis.analysis.hbonds.hbond_analysis`
+    Calculates the continuous of intermittent hydrogen bond
+    lifetime.
+
+"""
+
 from __future__ import absolute_import, division
 import numpy as np
 from copy import deepcopy
@@ -29,21 +66,25 @@
 def autocorrelation(list_of_sets, tau_max, window_step=1):
     r"""Implementation of a discrete autocorrelation function.
 
-    The autocorrelation of a property $x$ from a time $t=t_0$ to $t=t_0 + \tau$
+    The autocorrelation of a property :math:`x` from a time :math:`t=t_0` to :math:`t=t_0 + \tau`
     is given by:
+
     .. math::
         C(\tau) = \langle \frac{ x(t_0)x(t_0 +\tau) }{ x(t_0)x(t_0) } \rangle
 
-    where $x$ may represent any property of a particle, such as velocity or
+    where :math:`x` may represent any property of a particle, such as velocity or
     potential energy.
 
-    The survival probability, $S(\tau)$, is a special case of the time
+    This function is an implementation of a special case of the time
     autocorrelation function in which the property under consideration can
-    be encoded with indicator variables, $0$ and $1$, to represent the binary
-    state of said property. For instance, in calculating the survival probability
-    of water molecules within $5 \rm \AA$, each water molecule will either be
-    within this cutoff range ($1$) or not ($0$). The total number of water
-    molecules within the cutoff at time $t_0$ will be given by $N(t_0)$.
+    be encoded with indicator variables, :math:`0` and :math:`1`, to represent the binary
+    state of said property. This special case is often referred to as the
+    survival probability (:math:`S(\tau)`). As an example, in calculating the survival
+    probability of water molecules within :math:`5 \unicode{x212B}` of a protein, each water
+    molecule will either be within this cutoff range (:math:`1`) or not (:math:`0`). The
+    total number of water molecules within the cutoff at time :math:`t_0` will be
+    given by :math:`N(t_0)`. Other cases include the Hydrogen Bond Lifetime as
+    well as the translocation rate of cholesterol across a bilayer.
 
     The survival probability of a property of a set of particles is
     given by:
@@ -51,10 +92,10 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     .. math::
         S(\tau) =  \langle \frac{ N(t_0, t_0 + \tau )} { N(t_0) }\rangle
 
-    where $N(t0)$ is the number of particles at time $t_0$ for which the feature
-    is observed, and $N(t0, t_0 + \tau)$ is the number of particles for which
-    this feature is present at every frame from $t_0$ to $N(t0, t_0 + \tau)$.
-    The angular brackets represent an average over all time origins, $t_0$.
+    where :math:`N(t0)` is the number of particles at time :math:`t_0` for which the feature
+    is observed, and :math:`N(t0, t_0 + \tau)` is the number of particles for which
+    this feature is present at every frame from :math:`t_0` to :math:`N(t0, t_0 + \tau)`.
+    The angular brackets represent an average over all time origins, :math:`t_0`.
 
     See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037)
     for a description survival probability.
@@ -81,8 +122,8 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     timeseries : list of int
         the autocorelation values for each of the tau values
     timeseries_data : list of list of int
-        the raw data from which the autocorrelation is computed, i.e $S(\tau)$ at each window.
-        This allows the time dependant evolution of $S(\tau)$ to be investigated.
+        the raw data from which the autocorrelation is computed, i.e :math:`S(\tau)` at each window.
+        This allows the time dependant evolution of :math:`S(\tau)` to be investigated.
 
     .. versionadded:: 0.19.2
     """
@@ -123,26 +164,27 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
 
 
 def correct_intermittency(list_of_sets, intermittency):
-    """Preprocess data to allow intermittent behaviour prior to calling `autocorrelation`.
+    r"""Preprocess data to allow intermittent behaviour prior to calling :func:`autocorrelation`.
 
     Survival probabilty may be calculated either with a strict continuous requirement or
     a less strict intermittency. If calculating the survival probability water around a
     protein for example, in the former case the water must be within a cutoff distance
-    of the protein a every frame from $t_0$ to $t_0 + \tau$ in order for it to be considered
-    present at $t_0 + \tau$. In the intermittent case, the water molecule is allowed to
-    leave the region of interest for up to a specified consecutive number of frames.
+    of the protein at every frame from :math:`t_0` to :math:`t_0 + \tau` in order for it to be considered
+    present at :math:`t_0 + \tau`. In the intermittent case, the water molecule is allowed to
+    leave the region of interest for up to a specified consecutive number of frames whilst still
+    being considered present at :math:`t_0 + \tau`.
 
     This function pre-processes data, such as the atom ids of water molecules within a cutoff
     distance of a protein at each frame, in order to allow for intermittent behaviour, with a
     single pass over the data.
 
     For example, if an atom is absent for a number of frames equal or smaller than the parameter
-    `intermittency`, then this absence will be removed and thus the atom is considered to have
+    :attr:`intermittency`, then this absence will be removed and thus the atom is considered to have
     not left.
     e.g 7,A,A,7 with `intermittency=2` will be replaced by 7,7,7,7, where A=absence.
 
-    The returned data can be used as input to the function `autocorrelation` in order to calculate
-    the survival probability with a given intermittency.
+    The returned data can be used as input to the function :func:`autocorrelation` in order
+    to calculate the survival probability with a given intermittency.
 
     See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of
     intermittency in the calculation of hydrogen bond lifetimes.
@@ -155,13 +197,14 @@ def correct_intermittency(list_of_sets, intermittency):
         In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a
         single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}]
     intermittency : int
-      The maximum gap allowed. The default intermittency=0 means that if the datapoint is missing at any frame, no
+      The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no
       changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two
        consecutive frames will be instead be considered present.
 
     Returns
     -------
-        returns a new list with the IDs with added IDs which disappeared for <= :param intermittency.
+    list_of_sets: list
+        returns a new list with the IDs with added IDs which disappeared for <= :attr:`intermittency`.
         e.g If [{0, 1}, {0}, {0}, {0, 1}] is a list of sets of atom ids present at each frame and `intermittency=2`,
         both atoms will be considered present throughout and thus the returned list of sets will be
         [{0, 1}, {0, 1}, {0, 1}, {0, 1}].
diff --git a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
new file mode 100644
index 00000000000..857f8c000f5
--- /dev/null
+++ b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
@@ -0,0 +1,9 @@
+.. automodule:: MDAnalysis.analysis.utils.autocorrelation
+
+	Autocorrelation Function
+	------------------------
+	.. autofunction:: MDAnalysis.analysis.utils.autocorrelation.autocorrelation
+
+	Intermittency Function
+	----------------------
+	.. autofunction:: MDAnalysis.analysis.utils.autocorrelation.correct_intermittency
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst b/package/doc/sphinx/source/documentation_pages/analysis/utils.rst
deleted file mode 100644
index 048e8216099..00000000000
--- a/package/doc/sphinx/source/documentation_pages/analysis/utils.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. automodule:: MDAnalysis.analysis.utils
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
index 6c19528a3e8..d4bdced3046 100644
--- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
+++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
@@ -152,6 +152,6 @@ Utils
 =====
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
 
-   analysis/utils
+   analysis/autocorrelation

From cddf0a5d67bee652d164b6c1e19b53394922c976 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Tue, 14 Apr 2020 19:16:23 +0100
Subject: [PATCH 35/40] Moving the package to .lib.correlations + reST
 citations (PR #2256)

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 .../analysis/hbonds/hbond_analysis.py         |  3 +--
 package/MDAnalysis/analysis/waterdynamics.py  |  5 ++--
 .../correlations.py}                          | 26 ++++++++++++-------
 .../analysis/autocorrelation.rst              |  9 -------
 .../documentation_pages/analysis_modules.rst  |  8 ------
 .../documentation_pages/lib/correlations.rst  |  9 +++++++
 .../documentation_pages/lib_modules.rst       |  3 ++-
 .../analysis/test_waterdynamics.py            |  2 +-
 8 files changed, 33 insertions(+), 32 deletions(-)
 rename package/MDAnalysis/{analysis/utils/autocorrelation.py => lib/correlations.py} (90%)
 delete mode 100644 package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
 create mode 100644 package/doc/sphinx/source/documentation_pages/lib/correlations.rst

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index 374d65a9d32..6ec450b9a52 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -334,8 +334,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
 from MDAnalysis.lib.log import ProgressBar
 from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch
 from MDAnalysis.lib import distances
-
-from ..utils.autocorrelation import autocorrelation, correct_intermittency
+from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency
 
 
 logger = logging.getLogger('MDAnalysis.analysis.hbonds')
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index c7fb43a5685..4498356d3ae 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -445,12 +445,13 @@
 """
 from __future__ import print_function, division, absolute_import
 
+from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency
+import MDAnalysis.analysis.hbonds
 from six.moves import range, zip_longest
 import logging
 import warnings
 import numpy as np
-import MDAnalysis.analysis.hbonds
-from .utils.autocorrelation import autocorrelation, correct_intermittency
+
 
 logger = logging.getLogger('MDAnalysis.analysis.waterdynamics')
 from MDAnalysis.lib.log import ProgressBar
diff --git a/package/MDAnalysis/analysis/utils/autocorrelation.py b/package/MDAnalysis/lib/correlations.py
similarity index 90%
rename from package/MDAnalysis/analysis/utils/autocorrelation.py
rename to package/MDAnalysis/lib/correlations.py
index c8b751bfad8..0b6af25c231 100644
--- a/package/MDAnalysis/analysis/utils/autocorrelation.py
+++ b/package/MDAnalysis/lib/correlations.py
@@ -21,7 +21,7 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 
-"""Autocorrelation utilities --- :mod:`MDAnalysis.analysis.utils.autocorrelation`
+"""Correlations utilities --- :mod:`MDAnalysis.lib.correlations`
 =================================================================================
 
 
@@ -42,11 +42,10 @@
 acount for intermittency before passing the results to
 :func:`autocorrelation`.
 
-See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a further
-discussion on the time continuous and intermittent autocorrelation of
-hydrogen bond lifetimes.
+See [Gowers2015]_ for a further discussion on the time continuous and
+intermittent autocorrelation of hydrogen bond lifetimes.
 
-**Analysis tools that make use of modules**
+.. seeAlso:: Analysis tools that make use of modules
 
 * :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability`
     Calculates the continuous or intermittent survival probability
@@ -56,6 +55,15 @@
     Calculates the continuous of intermittent hydrogen bond
     lifetime.
 
+.. rubric:: References
+
+.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). A multiscale approach to model hydrogen bonding:
+            The case of polyamide. The Journal of Chemical Physics, 142(22), 224907.
+            https://doi.org/10.1063/1.4922445
+.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014).
+            Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure.
+            Biophysical Journal, 107(3), 599–612. https://doi.org/10.1016/J.BPJ.2014.05.037
+
 """
 
 from __future__ import absolute_import, division
@@ -80,7 +88,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     be encoded with indicator variables, :math:`0` and :math:`1`, to represent the binary
     state of said property. This special case is often referred to as the
     survival probability (:math:`S(\tau)`). As an example, in calculating the survival
-    probability of water molecules within :math:`5 \unicode{x212B}` of a protein, each water
+    probability of water molecules within 5 Å of a protein, each water
     molecule will either be within this cutoff range (:math:`1`) or not (:math:`0`). The
     total number of water molecules within the cutoff at time :math:`t_0` will be
     given by :math:`N(t_0)`. Other cases include the Hydrogen Bond Lifetime as
@@ -97,8 +105,7 @@ def autocorrelation(list_of_sets, tau_max, window_step=1):
     this feature is present at every frame from :math:`t_0` to :math:`N(t0, t_0 + \tau)`.
     The angular brackets represent an average over all time origins, :math:`t_0`.
 
-    See Araya-Secchi et al., 2014, (https://doi.org/10.1016/j.bpj.2014.05.037)
-    for a description survival probability.
+    See [Araya-Secchi2014]_ for a description survival probability.
 
     Parameters
     ----------
@@ -186,7 +193,7 @@ def correct_intermittency(list_of_sets, intermittency):
     The returned data can be used as input to the function :func:`autocorrelation` in order
     to calculate the survival probability with a given intermittency.
 
-    See Gowers and Carbonne, 2015, (DOI:10.1063.1.4922445) for a description of
+    See [Gowers2015]_ for a description of
     intermittency in the calculation of hydrogen bond lifetimes.
 
     # TODO - is intermittency consitent with list of sets of sets? (hydrogen bonds)
@@ -247,3 +254,4 @@ def correct_intermittency(list_of_sets, intermittency):
 
                 seen_frames_ago[atomid] = 0
     return list_of_sets
+
diff --git a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst b/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
deleted file mode 100644
index 857f8c000f5..00000000000
--- a/package/doc/sphinx/source/documentation_pages/analysis/autocorrelation.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. automodule:: MDAnalysis.analysis.utils.autocorrelation
-
-	Autocorrelation Function
-	------------------------
-	.. autofunction:: MDAnalysis.analysis.utils.autocorrelation.autocorrelation
-
-	Intermittency Function
-	----------------------
-	.. autofunction:: MDAnalysis.analysis.utils.autocorrelation.correct_intermittency
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
index d4bdced3046..19081f1b5dd 100644
--- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
+++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst
@@ -147,11 +147,3 @@ Data
    :maxdepth: 1
 
    analysis/data
-
-Utils
-=====
-
-.. toctree::
-   :maxdepth: 2
-
-   analysis/autocorrelation
diff --git a/package/doc/sphinx/source/documentation_pages/lib/correlations.rst b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst
new file mode 100644
index 00000000000..e68b0eba864
--- /dev/null
+++ b/package/doc/sphinx/source/documentation_pages/lib/correlations.rst
@@ -0,0 +1,9 @@
+.. automodule:: MDAnalysis.lib.correlations
+
+	Autocorrelation Function
+	------------------------
+	.. autofunction:: MDAnalysis.lib.correlations.autocorrelation
+
+	Intermittency Function
+	----------------------
+	.. autofunction:: MDAnalysis.lib.correlations.correct_intermittency
\ No newline at end of file
diff --git a/package/doc/sphinx/source/documentation_pages/lib_modules.rst b/package/doc/sphinx/source/documentation_pages/lib_modules.rst
index 0394d3fa28e..29ba1a05e8a 100644
--- a/package/doc/sphinx/source/documentation_pages/lib_modules.rst
+++ b/package/doc/sphinx/source/documentation_pages/lib_modules.rst
@@ -59,6 +59,7 @@ List of modules
    ./lib/transformations
    ./lib/qcprot
    ./lib/util
+   ./lib/correlations
 
 Low level file formats
 ----------------------
@@ -74,4 +75,4 @@ Python-based projects.
    :maxdepth: 1
 
    ./lib/formats/libmdaxdr
-   ./lib/formats/libdcd
+   ./lib/formats/libdcd
\ No newline at end of file
diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
index e4c39889537..3d2325e1a21 100644
--- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
+++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py
@@ -23,7 +23,7 @@
 from __future__ import print_function, absolute_import
 import MDAnalysis
 from MDAnalysis.analysis import waterdynamics
-from MDAnalysis.analysis.utils.autocorrelation import autocorrelation, correct_intermittency
+from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency
 
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
 

From 89991c379dbfb32b461047b89cec3a13ac2b6747 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Tue, 14 Apr 2020 19:48:01 +0100
Subject: [PATCH 36/40] Further clarifications #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/lib/correlations.py | 36 ++++++++++++++++----------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py
index 0b6af25c231..8555eb257f4 100644
--- a/package/MDAnalysis/lib/correlations.py
+++ b/package/MDAnalysis/lib/correlations.py
@@ -42,27 +42,37 @@
 acount for intermittency before passing the results to
 :func:`autocorrelation`.
 
-See [Gowers2015]_ for a further discussion on the time continuous and
-intermittent autocorrelation of hydrogen bond lifetimes.
+This module is inspired by seemingly disparate analyses that rely on the same
+underlying calculation, including the survival probability of water around
+proteins [Araya-Secchi2014]_, hydrogen bond lifetimes [Gowers2015]_, [Araya-Secchi2014]_,
+and the rate of cholesterol flip-flop in lipid bilayers [Gu2019]_.
 
-.. seeAlso:: Analysis tools that make use of modules
+.. seeAlso::
 
-* :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability`
-    Calculates the continuous or intermittent survival probability
-    of an atom group in a region of interest.
+    Analysis tools that make use of modules:
 
-* :class:`MDAnalysis.analysis.hbonds.hbond_analysis`
-    Calculates the continuous of intermittent hydrogen bond
-    lifetime.
+        * :class:`MDAnalysis.analysis.waterdynamics.SurvivalProbability`
+            Calculates the continuous or intermittent survival probability
+            of an atom group in a region of interest.
+
+        * :class:`MDAnalysis.analysis.hbonds.hbond_analysis`
+            Calculates the continuous or intermittent hydrogen bond
+            lifetime.
 
 .. rubric:: References
 
-.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015). A multiscale approach to model hydrogen bonding:
-            The case of polyamide. The Journal of Chemical Physics, 142(22), 224907.
+.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015).
+            A multiscale approach to model hydrogen bonding: The case of polyamide.
+            The Journal of Chemical Physics, 142(22), 224907.
             https://doi.org/10.1063/1.4922445
-.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014).
+.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014)
             Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure.
-            Biophysical Journal, 107(3), 599–612. https://doi.org/10.1016/J.BPJ.2014.05.037
+            Biophysical Journal. 107(3), 599–612.
+            https://doi.org/10.1016/J.BPJ.2014.05.037
+.. [Gu2019] Gu, R.-X.; Baoukina, S.; Tieleman, D. P. (2019)
+            Cholesterol Flip-Flop in Heterogeneous Membranes.
+            J. Chem. Theory Comput. 15 (3), 2064–2070.
+            https://doi.org/10.1021/acs.jctc.8b00933.
 
 """
 

From 4acb174ee7e530aa19cd0b7e4a300697b72616b7 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Wed, 15 Apr 2020 10:56:51 +0100
Subject: [PATCH 37/40] Indentation corrections.

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/lib/correlations.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py
index 8555eb257f4..130f1ef57ad 100644
--- a/package/MDAnalysis/lib/correlations.py
+++ b/package/MDAnalysis/lib/correlations.py
@@ -214,9 +214,9 @@ def correct_intermittency(list_of_sets, intermittency):
         In the simple case of e.g survival probability, a list of sets of atom ids present at each frame, where a
         single set contains atom ids at a given frame, e.g [{0, 1}, {0}, {0}, {0, 1}]
     intermittency : int
-      The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no
-      changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two
-       consecutive frames will be instead be considered present.
+        The maximum gap allowed. The default `intermittency=0` means that if the datapoint is missing at any frame, no
+        changes are made to the data. With the value of `intermittency=2`, all datapoints missing for up to two
+        consecutive frames will be instead be considered present.
 
     Returns
     -------

From 11e242b289b45b3cb12299130a5903af19277095 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Wed, 15 Apr 2020 14:05:26 +0100
Subject: [PATCH 38/40] Removing duplicate citation creating a sphinx warning
 #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/lib/correlations.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py
index 130f1ef57ad..056722065a7 100644
--- a/package/MDAnalysis/lib/correlations.py
+++ b/package/MDAnalysis/lib/correlations.py
@@ -61,10 +61,6 @@
 
 .. rubric:: References
 
-.. [Gowers2015] Gowers, R. J., & Carbone, P. (2015).
-            A multiscale approach to model hydrogen bonding: The case of polyamide.
-            The Journal of Chemical Physics, 142(22), 224907.
-            https://doi.org/10.1063/1.4922445
 .. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014)
             Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure.
             Biophysical Journal. 107(3), 599–612.

From 6f3846a81c689b17a3cdc0bc64c34b343057ad85 Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Wed, 15 Apr 2020 16:37:28 +0100
Subject: [PATCH 39/40] Another duplicate citation (sphinx) PR #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/MDAnalysis/lib/correlations.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/package/MDAnalysis/lib/correlations.py b/package/MDAnalysis/lib/correlations.py
index 056722065a7..483b5fcf97e 100644
--- a/package/MDAnalysis/lib/correlations.py
+++ b/package/MDAnalysis/lib/correlations.py
@@ -61,10 +61,6 @@
 
 .. rubric:: References
 
-.. [Araya-Secchi2014] Araya-Secchi, R., Perez-Acle, T., Kang, S., Huynh, T., Bernardin, A., Escalona, Y., Garate, J.-A., Martínez, A. D., García, I. E., Sáez, J. C., & Zhou, R. (2014)
-            Characterization of a Novel Water Pocket Inside the Human Cx26 Hemichannel Structure.
-            Biophysical Journal. 107(3), 599–612.
-            https://doi.org/10.1016/J.BPJ.2014.05.037
 .. [Gu2019] Gu, R.-X.; Baoukina, S.; Tieleman, D. P. (2019)
             Cholesterol Flip-Flop in Heterogeneous Membranes.
             J. Chem. Theory Comput. 15 (3), 2064–2070.

From 95641de08566cb7d028f61d1d93d2c1c7e29c73d Mon Sep 17 00:00:00 2001
From: bieniekmat <bieniekmat@gmail.com>
Date: Thu, 16 Apr 2020 19:45:37 +0100
Subject: [PATCH 40/40] Adding tags and summerising PR #2256

co-authored-by: p-j-smith <paul.smith@kcl.ac.uk>
---
 package/CHANGELOG                                    | 3 +++
 package/MDAnalysis/analysis/hbonds/hbond_analysis.py | 3 +++
 package/MDAnalysis/analysis/waterdynamics.py         | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index c1f0127c1b4..77a2288672e 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -116,6 +116,9 @@ Enhancements
   * Added weights_groupselections option in RMSD for mapping custom weights
     to groupselections (PR #2610, Issue #2429)
   * PersistenceLength.plot() now create new axes if current axes not provided (Issue #2590)
+  * Added a correlations module to provide functionality for analysis modules to
+    calculate the discrete autocorrelation function in a standardised way. Added
+    the capability to allow intermittent behaviour (PR #2256)
 
 Changes
   * Deprecated :class:`ProgressMeter` and replaced it with :class:`ProgressBar` using
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index 6ec450b9a52..8b89425c961 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -383,6 +383,9 @@ class HydrogenBondAnalysis(base.AnalysisBase):
        DEFAULT_DONORS/ACCEPTORS is now embedded in a dict to switch between
        default values for different force fields.
 
+    .. versionchanged:: 0.21.0
+        Added autocorrelation (MDAnalysis.lib.correlations.py) for calculating hydrogen bond lifetime
+
     .. versionchanged:: 1.0.0
        ``save_table()`` method has been removed. You can use ``np.save()`` or
        ``cPickle.dump()`` on :attr:`HydrogenBondAnalysis.table` instead.
diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py
index 4498356d3ae..70d42a26a93 100644
--- a/package/MDAnalysis/analysis/waterdynamics.py
+++ b/package/MDAnalysis/analysis/waterdynamics.py
@@ -1172,7 +1172,8 @@ class SurvivalProbability(object):
 
 
     .. versionadded:: 0.11.0
-
+    .. versionchanged:: 0.21.0
+        Using the MDAnalysis.lib.correlations.py to carry out the intermittency and autocorrelation calculations
     .. versionchanged:: 1.0.0
        Changed `selection` keyword to `select`
     """
@@ -1259,7 +1260,8 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte
         # fixme - to parallise: the section should be rewritten so that this loop only creates a list of indices,
         # on which the parallel _single_frame can be applied.
 
-        # skip frames that will not be used
+        # skip frames that will not be used in order to improve performance
+        # because AtomGroup.select_atoms is the most expensive part of this calculation
         # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip
         # Intermittency means that we have to load the extra frames to know if the atom is actually missing.
         # Say step=5 and tau=1, intermittency=0: LLSSS LLSSS