From 66f9a5a559c8d8c4d0142a74b6e4560ef5dca164 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 2 May 2021 20:59:23 +0100 Subject: [PATCH 1/7] Moves hbond_autocorrel and removes old hbond code --- .../MDAnalysis/analysis/hbonds/__init__.py | 2 - .../analysis/hbonds/hbond_analysis.py | 1462 ----------------- .../analysis/hbonds/hbond_autocorrel.py | 589 +------ .../analysis/hydrogenbonds/__init__.py | 4 +- .../hydrogenbonds/hbond_autocorrel.py | 608 +++++++ .../MDAnalysisTests/analysis/test_hbonds.py | 552 ------- .../analysis/test_hydrogenbondautocorrel.py | 17 +- 7 files changed, 631 insertions(+), 2603 deletions(-) delete mode 100644 package/MDAnalysis/analysis/hbonds/hbond_analysis.py create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py delete mode 100644 testsuite/MDAnalysisTests/analysis/test_hbonds.py diff --git a/package/MDAnalysis/analysis/hbonds/__init__.py b/package/MDAnalysis/analysis/hbonds/__init__.py index 1d895669687..8dc8091e969 100644 --- a/package/MDAnalysis/analysis/hbonds/__init__.py +++ b/package/MDAnalysis/analysis/hbonds/__init__.py @@ -22,9 +22,7 @@ # __all__ = [ - 'HydrogenBondAnalysis', 'HydrogenBondAutoCorrel', 'find_hydrogen_donors', ] -from .hbond_analysis import HydrogenBondAnalysis from .hbond_autocorrel import HydrogenBondAutoCorrel, find_hydrogen_donors diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py deleted file mode 100644 index d475e738bcd..00000000000 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ /dev/null @@ -1,1462 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# - -# Hydrogen Bonding Analysis -r"""Hydrogen Bond analysis (Deprecated) --- :mod:`MDAnalysis.analysis.hbonds.hbond_analysis` -============================================================================================ - -:Author: David Caplan, Lukas Grossar, Oliver Beckstein -:Year: 2010-2017 -:Copyright: GNU Public License v3 - -..Warning: - This module is deprecated and will be removed in version 2.0. - Please use :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_analysis` instead. - -Given a :class:`~MDAnalysis.core.universe.Universe` (simulation -trajectory with 1 or more frames) measure all hydrogen bonds for each -frame between selections 1 and 2. - -The :class:`HydrogenBondAnalysis` class is modeled after the `VMD -HBONDS plugin`_. - -.. _`VMD HBONDS plugin`: http://www.ks.uiuc.edu/Research/vmd/plugins/hbonds/ - -Options: - - *update_selections* (``True``): update selections at each frame? - - *selection_1_type* ("both"): selection 1 is the: "donor", "acceptor", "both" - - donor-acceptor *distance* (Å): 3.0 - - Angle *cutoff* (degrees): 120.0 - - *forcefield* to switch between default values for different force fields - - *donors* and *acceptors* atom types (to add additional atom names) - -.. _Analysis Output: - -Output ------- - -The results are - - the **identities** of donor and acceptor heavy-atoms, - - the **distance** between the heavy atom acceptor atom and the hydrogen atom - that is bonded to the heavy atom donor, - - the **angle** donor-hydrogen-acceptor angle (180º is linear). - -Hydrogen bond data are returned per frame, which is stored in -:attr:`HydrogenBondAnalysis.timeseries` (In the following description, ``#`` -indicates comments that are not part of the output.):: - - results = [ - [ # frame 1 - [ # hbond 1 - , - , , , - , - ], - [ # hbond 2 - , - , , , - , - ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - -Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat -the results as a flat "normalised" table that is easier to import into a -database or dataframe for further processing. The table itself is a -:class:`numpy.recarray`. - -.. _Detection-of-hydrogen-bonds: - -Detection of hydrogen bonds ---------------------------- - -Hydrogen bonds are recorded based on a geometric criterion: - -1. The distance between acceptor and hydrogen is less than or equal to - `distance` (default is 3 Å). - -2. The angle between donor-hydrogen-acceptor is greater than or equal to - `angle` (default is 120º). - -The cut-off values `angle` and `distance` can be set as keywords to -:class:`HydrogenBondAnalysis`. - -Donor and acceptor heavy atoms are detected from atom names. The current -defaults are appropriate for the CHARMM27 and GLYCAM06 force fields as defined -in Table `Default atom names for hydrogen bonding analysis`_. - -Hydrogen atoms bonded to a donor are searched with one of two algorithms, -selected with the `detect_hydrogens` keyword. - -"distance" - Searches for all hydrogens (name "H*" or name "[123]H" or type "H") in the - same residue as the donor atom within a cut-off distance of 1.2 Å. - -"heuristic" - Looks at the next three atoms in the list of atoms following the donor and - selects any atom whose name matches (name "H*" or name "[123]H"). For - -The "distance" search is more rigorous but slower and is set as the -default. Until release 0.7.6, only the heuristic search was implemented. - -.. versionchanged:: 0.7.6 - Distance search added (see - :meth:`HydrogenBondAnalysis._get_bonded_hydrogens_dist`) and heuristic - search improved (:meth:`HydrogenBondAnalysis._get_bonded_hydrogens_list`) - -.. _Default atom names for hydrogen bonding analysis: - -.. table:: Default heavy atom names for CHARMM27 force field. - - =========== ============== =========== ==================================== - group donor acceptor comments - =========== ============== =========== ==================================== - main chain N O, OC1, OC2 OC1, OC2 from amber99sb-ildn - (Gromacs) - water OH2, OW OH2, OW SPC, TIP3P, TIP4P (CHARMM27,Gromacs) - - ARG NE, NH1, NH2 - ASN ND2 OD1 - ASP OD1, OD2 - CYS SG - CYH SG possible false positives for CYS - GLN NE2 OE1 - GLU OE1, OE2 - HIS ND1, NE2 ND1, NE2 presence of H determines if donor - HSD ND1 NE2 - HSE NE2 ND1 - HSP ND1, NE2 - LYS NZ - MET SD see e.g. [Gregoret1991]_ - SER OG OG - THR OG1 OG1 - TRP NE1 - TYR OH OH - =========== ============== =========== ==================================== - -.. table:: Heavy atom types for GLYCAM06 force field. - - =========== =========== ================== - element donor acceptor - =========== =========== ================== - N N,NT,N3 N,NT - O OH,OW O,O2,OH,OS,OW,OY - S SM - =========== =========== ================== - -Donor and acceptor names for the CHARMM27 force field will also work for e.g. -OPLS/AA (tested in Gromacs). Residue names in the table are for information -only and are not taken into account when determining acceptors and donors. -This can potentially lead to some ambiguity in the assignment of -donors/acceptors for residues such as histidine or cytosine. - -For more information about the naming convention in GLYCAM06 have a look at the -`Carbohydrate Naming Convention in Glycam`_. - -.. _`Carbohydrate Naming Convention in Glycam`: - http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm - -The lists of donor and acceptor names can be extended by providing lists of -atom names in the `donors` and `acceptors` keywords to -:class:`HydrogenBondAnalysis`. If the lists are entirely inappropriate -(e.g. when analysing simulations done with a force field that uses very -different atom names) then one should either use the value "other" for `forcefield` -to set no default values, or derive a new class and set the default list oneself:: - - class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): - DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} - DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} - -Then simply use the new class instead of the parent class and call it with -`forcefield` = "OtherFF". Please also consider to contribute the list of heavy -atom names to MDAnalysis. - -.. rubric:: References - -.. [Gregoret1991] L.M. Gregoret, S.D. Rader, R.J. Fletterick, and - F.E. Cohen. Hydrogen bonds involving sulfur atoms in proteins. Proteins, - 9(2):99–107, 1991. `10.1002/prot.340090204`_. - -.. _`10.1002/prot.340090204`: http://dx.doi.org/10.1002/prot.340090204 - - -How to perform HydrogenBondAnalysis ------------------------------------ - -All protein-water hydrogen bonds can be analysed with :: - - import MDAnalysis - import MDAnalysis.analysis.hbonds - - u = MDAnalysis.Universe('topology', 'trajectory') - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'protein', 'resname HOH', distance=3.0, angle=120.0) - h.run() - -.. Note:: - - Due to the way :class:`HydrogenBondAnalysis` is implemented, it is - more efficient to have the second selection (`selection2`) be the - *larger* group, e.g. the water when looking at water-protein - H-bonds or the whole protein when looking at ligand-protein - interactions. - - -The results are stored as the attribute -:attr:`HydrogenBondAnalysis.timeseries`; see :ref:`Analysis Output` for the -format and further options. - -A number of convenience functions are provided to process the -:attr:`~HydrogenBondAnalysis.timeseries` according to varying criteria: - -:meth:`~HydrogenBondAnalysis.count_by_time` - time series of the number of hydrogen bonds per time step -:meth:`~HydrogenBondAnalysis.count_by_type` - data structure with the frequency of each observed hydrogen bond -:meth:`~HydrogenBondAnalysis.timesteps_by_type` - data structure with a list of time steps for each observed hydrogen bond - -For further data analysis it is convenient to process the -:attr:`~HydrogenBondAnalysis.timeseries` data into a normalized table with the -:meth:`~HydrogenBondAnalysis.generate_table` method, which creates a new data -structure :attr:`HydrogenBondAnalysis.table` that contains one row for each -observation of a hydrogen bond:: - - h.generate_table() - -This table can then be easily turned into, e.g., a `pandas.DataFrame`_, and -further analyzed:: - - import pandas as pd - df = pd.DataFrame.from_records(h.table) - -For example, plotting a histogram of the hydrogen bond angles and lengths is as -simple as :: - - df.hist(column=["angle", "distance"]) - -.. _pandas.DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html - - -.. TODO: notes on selection updating - - -Classes -------- - -.. autoclass:: HydrogenBondAnalysis - :members: - - .. attribute:: timesteps - - List of the times of each timestep. This can be used together with - :attr:`~HydrogenBondAnalysis.timeseries` to find the specific time point - of a hydrogen bond existence, or see :attr:`~HydrogenBondAnalysis.table`. - - .. attribute:: table - - A normalised table of the data in - :attr:`HydrogenBondAnalysis.timeseries`, generated by - :meth:`HydrogenBondAnalysis.generate_table`. It is a - :class:`numpy.recarray` with the following columns: - - 0. "time" - 1. "donor_index" - 2. "acceptor_index" - 3. "donor_resnm" - 4. "donor_resid" - 5. "donor_atom" - 6. "acceptor_resnm" - 7. "acceptor_resid" - 8. "acceptor_atom" - 9. "distance" - 10. "angle" - - It takes up more space than :attr:`~HydrogenBondAnalysis.timeseries` but - it is easier to analyze and to import into databases or dataframes. - - - .. rubric:: Example - - For example, to create a `pandas.DataFrame`_ from ``h.table``:: - - import pandas as pd - df = pd.DataFrame.from_records(h.table) - - - .. versionchanged:: 0.17.0 - The 1-based donor and acceptor indices (*donor_idx* and - *acceptor_idx*) are deprecated in favor of 0-based indices. - - .. automethod:: _get_bonded_hydrogens - - .. automethod:: _get_bonded_hydrogens_dist - - .. automethod:: _get_bonded_hydrogens_list - -""" -import warnings -import logging -from collections import defaultdict - -import numpy as np - -from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning -from .. import base -from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch -from MDAnalysis.lib import distances -from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency - - -logger = logging.getLogger('MDAnalysis.analysis.hbonds') - -warnings.warn( - "This module is deprecated as of MDAnalysis version 1.0." - "It will be removed in MDAnalysis version 2.0" - "Please use MDAnalysis.analysis.hydrogenbonds.hbond_analysis instead.", - category=DeprecationWarning - ) - - -class HydrogenBondAnalysis(base.AnalysisBase): - """Perform a hydrogen bond analysis - - The analysis of the trajectory is performed with the - :meth:`HydrogenBondAnalysis.run` method. The result is stored in - :attr:`HydrogenBondAnalysis.timeseries`. See - :meth:`~HydrogenBondAnalysis.run` for the format. - - The default atom names are taken from the CHARMM 27 force field files, which - will also work for e.g. OPLS/AA in Gromacs, and GLYCAM06. - - *Donors* (associated hydrogens are deduced from topology) - *CHARMM 27* - N of the main chain, water OH2/OW, ARG NE/NH1/NH2, ASN ND2, HIS ND1/NE2, - SER OG, TYR OH, CYS SG, THR OG1, GLN NE2, LYS NZ, TRP NE1 - *GLYCAM06* - N,NT,N3,OH,OW - - *Acceptors* - *CHARMM 27* - O of the main chain, water OH2/OW, ASN OD1, ASP OD1/OD2, CYH SG, GLN OE1, - GLU OE1/OE2, HIS ND1/NE2, MET SD, SER OG, THR OG1, TYR OH - *GLYCAM06* - N,NT,O,O2,OH,OS,OW,OY,P,S,SM - *amber99sb-ildn(Gromacs)* - OC1, OC2 of the main chain - - See Also - -------- - :ref:`Default atom names for hydrogen bonding analysis` - - - .. versionchanged:: 0.7.6 - DEFAULT_DONORS/ACCEPTORS is now embedded in a dict to switch between - default values for different force fields. - - .. versionchanged:: 1.0.0 - Added autocorrelation (MDAnalysis.lib.correlations.py) for calculating hydrogen bond lifetime - ``save_table()`` method has been removed. You can use ``np.save()`` or - ``cPickle.dump()`` on :attr:`HydrogenBondAnalysis.table` instead. - """ - - # use tuple(set()) here so that one can just copy&paste names from the - # table; set() takes care for removing duplicates. At the end the - # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. - - #: default heavy atom names whose hydrogens are treated as *donors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `donors` to add a list of additional donor names. - DEFAULT_DONORS = { - 'CHARMM27': tuple(set([ - 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), - 'other': tuple(set([]))} - - #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `acceptors` to add a list of additional acceptor names. - DEFAULT_ACCEPTORS = { - 'CHARMM27': tuple(set([ - 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), - 'other': tuple(set([]))} - - #: A :class:`collections.defaultdict` of covalent radii of common donors - #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is - #: sufficiently close to its donor heavy atom). Values are stored for - #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens - #: covalently bound at a maximum distance of 1.5 Å. - r_cov = defaultdict(lambda: 1.5, # default value - N=1.31, O=1.31, P=1.58, S=1.55) - - def __init__(self, universe, selection1='protein', selection2='all', selection1_type='both', - update_selection1=True, update_selection2=True, filter_first=True, distance_type='hydrogen', - distance=3.0, angle=120.0, - forcefield='CHARMM27', donors=None, acceptors=None, - debug=False, detect_hydrogens='distance', verbose=False, pbc=False, **kwargs): - """Set up calculation of hydrogen bonds between two selections in a universe. - - The timeseries is accessible as the attribute :attr:`HydrogenBondAnalysis.timeseries`. - - Some initial checks are performed. If there are no atoms selected by - `selection1` or `selection2` or if no donor hydrogens or acceptor atoms - are found then a :exc:`SelectionError` is raised for any selection that - does *not* update (`update_selection1` and `update_selection2` - keywords). For selections that are set to update, only a warning is - logged because it is assumed that the selection might contain atoms at - a later frame (e.g. for distance based selections). - - If no hydrogen bonds are detected or if the initial check fails, look - at the log output (enable with :func:`MDAnalysis.start_logging` and set - `verbose` ``=True``). It is likely that the default names for donors - and acceptors are not suitable (especially for non-standard - ligands). In this case, either change the `forcefield` or use - customized `donors` and/or `acceptors`. - - Parameters - ---------- - universe : Universe - Universe object - selection1 : str (optional) - Selection string for first selection ['protein'] - selection2 : str (optional) - Selection string for second selection ['all'] - selection1_type : {"donor", "acceptor", "both"} (optional) - Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the - value for `selection1_type` automatically determines how - `selection2` handles donors and acceptors: If `selection1` contains - 'both' then `selection2` will also contain 'both'. If `selection1` - is set to 'donor' then `selection2` is 'acceptor' (and vice versa). - ['both']. - update_selection1 : bool (optional) - Update selection 1 at each frame? Setting to ``False`` is recommended - for any static selection to increase performance. [``True``] - update_selection2 : bool (optional) - Update selection 2 at each frame? Setting to ``False`` is recommended - for any static selection to increase performance. [``True``] - filter_first : bool (optional) - Filter selection 2 first to only atoms 3 * `distance` away [``True``] - distance : float (optional) - Distance cutoff for hydrogen bonds; only interactions with a H-A distance - <= `distance` (and the appropriate D-H-A angle, see `angle`) are - recorded. (Note: `distance_type` can change this to the D-A distance.) [3.0] - angle : float (optional) - Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of - 180º. A hydrogen bond is only recorded if the D-H-A angle is - >= `angle`. The default of 120º also finds fairly non-specific - hydrogen interactions and a possibly better value is 150º. [120.0] - forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) - Name of the forcefield used. Switches between different - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS` values. - ["CHARMM27"] - donors : sequence (optional) - Extra H donor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS`), must be a sequence. - acceptors : sequence (optional) - Extra H acceptor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS`), must be a sequence. - detect_hydrogens : {"distance", "heuristic"} (optional) - Determine the algorithm to find hydrogens connected to donor - atoms. Can be "distance" (default; finds all hydrogens in the - donor's residue within a cutoff of the donor) or "heuristic" - (looks for the next few atoms in the atom list). "distance" should - always give the correct answer but "heuristic" is faster, - especially when the donor list is updated each - for each frame. ["distance"] - distance_type : {"hydrogen", "heavy"} (optional) - Measure hydrogen bond lengths between donor and acceptor heavy - attoms ("heavy") or between donor hydrogen and acceptor heavy - atom ("hydrogen"). If using "heavy" then one should set the *distance* - cutoff to a higher value such as 3.5 Å. ["hydrogen"] - debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) [``False``] - verbose : bool (optional) - Toggle progress output. (Can also be given as keyword argument to - :meth:`run`.) - pbc : bool (optional) - Whether to consider periodic boundaries in calculations [``False``] - - Raises - ------ - :exc:`SelectionError` - is raised for each static selection without the required - donors and/or acceptors. - - Notes - ----- - In order to speed up processing, atoms are filtered by a coarse - distance criterion before a detailed hydrogen bonding analysis is - performed (`filter_first` = ``True``). If one of your selections is - e.g. the solvent then `update_selection1` (or `update_selection2`) must - also be ``True`` so that the list of candidate atoms is updated at each - step: this is now the default. - - If your selections will essentially remain the same for all time steps - (i.e. residues are not moving farther than 3 x `distance`), for - instance, if neither water nor large conformational changes are - involved or if the optimization is disabled (`filter_first` = - ``False``) then you can improve performance by setting the - `update_selection1` and/or `update_selection2` keywords to ``False``. - - - .. versionchanged:: 0.7.6 - New `verbose` keyword (and per-frame debug logging disabled by - default). - - New `detect_hydrogens` keyword to switch between two different - algorithms to detect hydrogens bonded to donor. "distance" is a new, - rigorous distance search within the residue of the donor atom, - "heuristic" is the previous list scan (improved with an additional - distance check). - - New `forcefield` keyword to switch between different values of - DEFAULT_DONORS/ACCEPTORS to accomodate different force fields. - Also has an option "other" for no default values. - - .. versionchanged:: 0.8 - The new default for `update_selection1` and `update_selection2` is now - ``True`` (see `Issue 138`_). Set to ``False`` if your selections only - need to be determined once (will increase performance). - - .. versionchanged:: 0.9.0 - New keyword `distance_type` to select between calculation between - heavy atoms or hydrogen-acceptor. It defaults to the previous - behavior (i.e. "hydrogen"). - - .. versionchanged:: 0.11.0 - Initial checks for selections that potentially raise :exc:`SelectionError`. - - .. versionchanged:: 0.17.0 - use 0-based indexing - - .. deprecated:: 0.16 - The previous `verbose` keyword argument was replaced by - `debug`. Note that the `verbose` keyword argument is now - consistently used to toggle progress meters throughout the library. - - .. _`Issue 138`: https://github.com/MDAnalysis/mdanalysis/issues/138 - - """ - super(HydrogenBondAnalysis, self).__init__(universe.trajectory, **kwargs) - - warnings.warn( - "This class is deprecated as of MDAnalysis version 1.0 and will " - "be removed in version 2.0." - "Please use MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis instead.", - category=DeprecationWarning - ) - - # per-frame debugging output? - self.debug = debug - - self._get_bonded_hydrogens_algorithms = { - "distance": self._get_bonded_hydrogens_dist, # 0.7.6 default - "heuristic": self._get_bonded_hydrogens_list, # pre 0.7.6 - } - if not detect_hydrogens in self._get_bonded_hydrogens_algorithms: - raise ValueError("detect_hydrogens must be one of {0!r}".format( - self._get_bonded_hydrogens_algorithms.keys())) - self.detect_hydrogens = detect_hydrogens - - self.u = universe - self.selection1 = selection1 - self.selection2 = selection2 - self.selection1_type = selection1_type - self.update_selection1 = update_selection1 - self.update_selection2 = update_selection2 - self.filter_first = filter_first - self.distance = distance - self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior - self.angle = angle - self.pbc = pbc and all(self.u.dimensions[:3]) - - # set up the donors/acceptors lists - if donors is None: - donors = [] - if acceptors is None: - acceptors = [] - self.forcefield = forcefield - self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) - self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) - - if not (self.selection1 and self.selection2): - raise ValueError('HydrogenBondAnalysis: invalid selections') - elif self.selection1_type not in ('both', 'donor', 'acceptor'): - raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format(self.selection1_type)) - - self._timeseries = None # final result accessed as self.timeseries - self.timesteps = None # time for each frame - - self.table = None # placeholder for output table - - self._update_selection_1() - self._update_selection_2() - - self._log_parameters() - - if self.selection1_type == 'donor': - self._sanity_check(1, 'donors') - self._sanity_check(2, 'acceptors') - elif self.selection1_type == 'acceptor': - self._sanity_check(1, 'acceptors') - self._sanity_check(2, 'donors') - else: # both - self._sanity_check(1, 'donors') - self._sanity_check(1, 'acceptors') - self._sanity_check(2, 'acceptors') - self._sanity_check(2, 'donors') - logger.info("HBond analysis: initial checks passed.") - - def _sanity_check(self, selection, htype): - """sanity check the selections 1 and 2 - - *selection* is 1 or 2, *htype* is "donors" or "acceptors" - - If selections do not update and the required donor and acceptor - selections are empty then a :exc:`SelectionError` is immediately - raised. - - If selections update dynamically then it is possible that the selection - will yield donors/acceptors at a later step and we only issue a - warning. - - .. versionadded:: 0.11.0 - """ - assert selection in (1, 2) - assert htype in ("donors", "acceptors") - # horrible data organization: _s1_donors, _s2_acceptors, etc, update_selection1, ... - atoms = getattr(self, "_s{0}_{1}".format(selection, htype)) - update = getattr(self, "update_selection{0}".format(selection)) - if not atoms: - errmsg = "No {1} found in selection {0}. " \ - "You might have to specify a custom '{1}' keyword.".format( - selection, htype) - if not update: - logger.error(errmsg) - raise SelectionError(errmsg) - else: - errmsg += " Selection will update so continuing with fingers crossed." - warnings.warn(errmsg, category=SelectionWarning) - logger.warning(errmsg) - - def _log_parameters(self): - """Log important parameters to the logfile.""" - logger.info("HBond analysis: selection1 = %r (update: %r)", self.selection1, self.update_selection1) - logger.info("HBond analysis: selection2 = %r (update: %r)", self.selection2, self.update_selection2) - logger.info("HBond analysis: criterion: donor %s atom and acceptor atom distance <= %.3f A", self.distance_type, - self.distance) - logger.info("HBond analysis: criterion: angle D-H-A >= %.3f degrees", self.angle) - logger.info("HBond analysis: force field %s to guess donor and acceptor names", self.forcefield) - logger.info("HBond analysis: bonded hydrogen detection algorithm: %r", self.detect_hydrogens) - - def _get_bonded_hydrogens(self, atom, **kwargs): - """Find hydrogens bonded to `atom`. - - This method is typically not called by a user but it is documented to - facilitate understanding of the internals of - :class:`HydrogenBondAnalysis`. - - Parameters - ---------- - atom : groups.Atom - heavy atom - **kwargs - passed through to the calculation method that was selected with - the `detect_hydrogens` kwarg of :class:`HydrogenBondAnalysis`. - - Returns - ------- - hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) - or empty list ``[]`` if none were found. - - See Also - -------- - :meth:`_get_bonded_hydrogens_dist` - :meth:`_get_bonded_hydrogens_list` - - - .. versionchanged:: 0.7.6 - Can switch algorithm by using the `detect_hydrogens` keyword to the - constructor. *kwargs* can be used to supply arguments for algorithm. - - """ - return self._get_bonded_hydrogens_algorithms[self.detect_hydrogens](atom, **kwargs) - - def _get_bonded_hydrogens_dist(self, atom): - """Find hydrogens bonded within cutoff to `atom`. - - Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); - this is not fool-proof as the atom type is not always a character but - the name pattern should catch most typical occurrences. - - The distance from `atom` is calculated for all hydrogens in the residue - and only those within a cutoff are kept. The cutoff depends on the - heavy atom (more precisely, on its element, which is taken as the first - letter of its name ``atom.name[0]``) and is parameterized in - :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the - default of 1.5 Å is used. - - - Parameters - ---------- - atom : groups.Atom - heavy atom - - Returns - ------- - hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) - or empty list ``[]`` if none were found. - - Notes - ----- - The performance of this implementation could be improved once the - topology always contains bonded information; it currently uses the - selection parser with an "around" selection. - - - .. versionadded:: 0.7.6 - - """ - try: - return atom.residue.atoms.select_atoms( - "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" - "".format(self.r_cov[atom.name[0]], atom.name)) - except NoDataError: - return [] - - def _get_bonded_hydrogens_list(self, atom, **kwargs): - """Find "bonded" hydrogens to the donor *atom*. - - At the moment this relies on the **assumption** that the - hydrogens are listed directly after the heavy atom in the - topology. If this is not the case then this function will - fail. - - Hydrogens are detected by name ``H*``, ``[123]H*`` and they have to be - within a maximum distance from the heavy atom. The cutoff distance - depends on the heavy atom and is parameterized in - :attr:`HydrogenBondAnalysis.r_cov`. - - Parameters - ---------- - atom : groups.Atom - heavy atom - **kwargs - ignored - - Returns - ------- - hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) - or empty list ``[]`` if none were found. - - - .. versionchanged:: 0.7.6 - - Added detection of ``[123]H`` and additional check that a - selected hydrogen is bonded to the donor atom (i.e. its - distance to the donor is less than the covalent radius - stored in :attr:`HydrogenBondAnalysis.r_cov` or the default - 1.5 Å). - - Changed name to - :meth:`~HydrogenBondAnalysis._get_bonded_hydrogens_list` - and added *kwargs* so that it can be used instead of - :meth:`~HydrogenBondAnalysis._get_bonded_hydrogens_dist`. - - """ - warnings.warn("_get_bonded_hydrogens_list() (heuristic detection) does " - "not always find " - "all hydrogens; Using detect_hydrogens='distance', when " - "constructing the HydrogenBondAnalysis class is safer. " - "Removal of this feature is targeted for 1.0", - category=DeprecationWarning) - box = self.u.dimensions if self.pbc else None - try: - hydrogens = [ - a for a in self.u.atoms[atom.index + 1:atom.index + 4] - if (a.name.startswith(('H', '1H', '2H', '3H')) and - distances.calc_bonds(atom.position, a.position, box=box) < self.r_cov[atom.name[0]]) - ] - except IndexError: - hydrogens = [] # weird corner case that atom is the last one in universe - return hydrogens - - def _update_selection_1(self): - self._s1 = self.u.select_atoms(self.selection1) - self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) - if not self._s1: - logger.warning("Selection 1 '{0}' did not select any atoms." - .format(str(self.selection1)[:80])) - self._s1_donors = {} - self._s1_donors_h = {} - self._s1_acceptors = {} - if self.selection1_type in ('donor', 'both'): - self._s1_donors = self._s1.select_atoms( - 'name {0}'.format(' '.join(self.donors))) - self._s1_donors_h = {} - for i, d in enumerate(self._s1_donors): - tmp = self._get_bonded_hydrogens(d) - if tmp: - self._s1_donors_h[i] = tmp - self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) - self.logger_debug("Selection 1 donor hydrogens: {0}".format(len(self._s1_donors_h))) - if self.selection1_type in ('acceptor', 'both'): - self._s1_acceptors = self._s1.select_atoms( - 'name {0}'.format(' '.join(self.acceptors))) - self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) - - def _update_selection_2(self): - box = self.u.dimensions if self.pbc else None - self._s2 = self.u.select_atoms(self.selection2) - if self.filter_first and self._s2: - self.logger_debug('Size of selection 2 before filtering:' - ' {} atoms'.format(len(self._s2))) - ns_selection_2 = AtomNeighborSearch(self._s2, box) - self._s2 = ns_selection_2.search(self._s1, 3. * self.distance) - self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) - if not self._s2: - logger.warning('Selection 2 "{0}" did not select any atoms.' - .format(str(self.selection2)[:80])) - self._s2_donors = {} - self._s2_donors_h = {} - self._s2_acceptors = {} - if not self._s2: - return None - if self.selection1_type in ('donor', 'both'): - self._s2_acceptors = self._s2.select_atoms( - 'name {0}'.format(' '.join(self.acceptors))) - self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) - if self.selection1_type in ('acceptor', 'both'): - self._s2_donors = self._s2.select_atoms( - 'name {0}'.format(' '.join(self.donors))) - self._s2_donors_h = {} - for i, d in enumerate(self._s2_donors): - tmp = self._get_bonded_hydrogens(d) - if tmp: - self._s2_donors_h[i] = tmp - self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) - self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_donors_h))) - - def logger_debug(self, *args): - if self.debug: - logger.debug(*args) - - def run(self, start=None, stop=None, step=None, verbose=None, **kwargs): - """Analyze trajectory and produce timeseries. - - Stores the hydrogen bond data per frame as - :attr:`HydrogenBondAnalysis.timeseries` (see there for output - format). - - Parameters - ---------- - start : int (optional) - starting frame-index for analysis, ``None`` is the first one, 0. - `start` and `stop` are 0-based frame indices and are used to slice - the trajectory (if supported) [``None``] - stop : int (optional) - last trajectory frame for analysis, ``None`` is the last one [``None``] - step : int (optional) - read every `step` between `start` (included) and `stop` (excluded), - ``None`` selects 1. [``None``] - verbose : bool (optional) - toggle progress meter output - :class:`~MDAnalysis.lib.log.ProgressBar` [``True``] - debug : bool (optional) - enable detailed logging of debugging information; this can create - *very big* log files so it is disabled (``False``) by default; setting - `debug` toggles the debug status for :class:`HydrogenBondAnalysis`, - namely the value of :attr:`HydrogenBondAnalysis.debug`. - - Other Parameters - ---------------- - remove_duplicates : bool (optional) - duplicate hydrogen bonds are removed from output if set to the - default value ``True``; normally, this should not be changed. - - See Also - -------- - :meth:`HydrogenBondAnalysis.generate_table` : - processing the data into a different format. - - - .. versionchanged:: 0.7.6 - Results are not returned, only stored in - :attr:`~HydrogenBondAnalysis.timeseries` and duplicate hydrogen bonds - are removed from output (can be suppressed with `remove_duplicates` = - ``False``) - - .. versionchanged:: 0.11.0 - Accept `quiet` keyword. Analysis will now proceed through frames even if - no donors or acceptors were found in a particular frame. - - .. deprecated:: 0.16 - The `quiet` keyword argument is deprecated in favor of the `verbose` - one. Previous use of `verbose` now corresponds to the new keyword - argument `debug`. - - """ - # sets self.start/stop/step and _pm - self._setup_frames(self._trajectory, start, stop, step) - - logger.info("HBond analysis: starting") - logger.debug("HBond analysis: donors %r", self.donors) - logger.debug("HBond analysis: acceptors %r", self.acceptors) - - remove_duplicates = kwargs.pop('remove_duplicates', True) # False: old behaviour - if not remove_duplicates: - logger.warning("Hidden feature remove_duplicates=False activated: you will probably get duplicate H-bonds.") - - debug = kwargs.pop('debug', None) - if debug is not None and debug != self.debug: - self.debug = debug - logger.debug("Toggling debug to %r", self.debug) - if not self.debug: - logger.debug("HBond analysis: For full step-by-step debugging output use debug=True") - - self._timeseries = [] - self.timesteps = [] - - try: - self.u.trajectory.time - def _get_timestep(): - return self.u.trajectory.time - logger.debug("HBond analysis is recording time step") - except NotImplementedError: - # chained reader or xyz(?) cannot do time yet - def _get_timestep(): - return self.u.trajectory.frame - logger.warning("HBond analysis is recording frame number instead of time step") - - logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", - self.start, self.stop, self.step) - - for ts in ProgressBar(self.u.trajectory[self.start:self.stop:self.step], - desc="HBond analysis", - verbose=kwargs.get('verbose', False)): - # all bonds for this timestep - frame_results = [] - # dict of tuples (atom.index, atom.index) for quick check if - # we already have the bond (to avoid duplicates) - already_found = {} - - frame = ts.frame - timestep = _get_timestep() - self.timesteps.append(timestep) - - self.logger_debug("Analyzing frame %(frame)d, timestep %(timestep)f ps", vars()) - if self.update_selection1: - self._update_selection_1() - if self.update_selection2: - self._update_selection_2() - - box = self.u.dimensions if self.pbc else None - if self.selection1_type in ('donor', 'both') and self._s2_acceptors: - self.logger_debug("Selection 1 Donors <-> Acceptors") - ns_acceptors = AtomNeighborSearch(self._s2_acceptors, box) - for i, donor_h_set in self._s1_donors_h.items(): - d = self._s1_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - angle = distances.calc_angles(d.position, - h.position, - a.position, box=box) - angle = np.rad2deg(angle) - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, a.position, box=box) - if angle >= self.angle and dist <= self.distance: - self.logger_debug( - "S1-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG".format(h.index, a.index, dist, angle)) - frame_results.append( - [h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle]) - - already_found[(h.index, a.index)] = True - if self.selection1_type in ('acceptor', 'both') and self._s1_acceptors: - self.logger_debug("Selection 1 Acceptors <-> Donors") - ns_acceptors = AtomNeighborSearch(self._s1_acceptors, box) - for i, donor_h_set in self._s2_donors_h.items(): - d = self._s2_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - if remove_duplicates and ( - (h.index, a.index) in already_found or - (a.index, h.index) in already_found): - continue - angle = distances.calc_angles(d.position, - h.position, - a.position, box=box) - angle = np.rad2deg(angle) - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, a.position, box=box) - if angle >= self.angle and dist <= self.distance: - self.logger_debug( - "S1-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG".format(a.index, h.index, dist, angle)) - frame_results.append( - [h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle]) - - self._timeseries.append(frame_results) - logger.info("HBond analysis: complete; timeseries %s.timeseries", - self.__class__.__name__) - - @property - def timeseries(self): - """Time series of hydrogen bonds. - - The results of the hydrogen bond analysis can be accessed as a `list` of `list` of `list`: - - 1. `timeseries[i]`: data for the i-th trajectory frame (at time - `timesteps[i]`, see :attr:`timesteps`) - 2. `timeseries[i][j]`: j-th hydrogen bond that was detected at the i-th - frame. - 3. ``donor_index, acceptor_index, - donor_name_str, acceptor_name_str, distance, angle = - timeseries[i][j]``: structure of one hydrogen bond data item - - - In the following description, ``#`` indicates comments that are not - part of the output:: - - results = [ - [ # frame 1 - [ # hbond 1 - , , - , , , - ], - [ # hbond 2 - , , - , , , - ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - - The time of each step is not stored with each hydrogen bond frame but in - :attr:`~HydrogenBondAnalysis.timesteps`. - - - Note - ---- - For instance, to find an acceptor atom in :attr:`Universe.atoms` by - *index* one would use ``u.atoms[acceptor_index]``. - - The :attr:`timeseries` is a managed attribute and it is generated - from the underlying data in :attr:`_timeseries` every time the - attribute is accessed. It is therefore costly to call and if - :attr:`timeseries` is needed repeatedly it is recommended that you - assign to a variable:: - - h = HydrogenBondAnalysis(u) - h.run() - timeseries = h.timeseries - - - See Also - -------- - :attr:`table` : structured array of the data - - - .. versionchanged:: 0.16.1 - :attr:`timeseries` has become a managed attribute and is generated from the stored - :attr:`_timeseries` when needed. :attr:`_timeseries` contains the donor atom and - acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings. - - .. versionchanged:: 0.17.0 - The 1-based indices "donor_idx" and "acceptor_idx" are being - removed in favor of the 0-based indices "donor_index" and - "acceptor_index". - - """ - return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries] - - @staticmethod - def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"): - """Convert 0.16.1 _timeseries hbond item to 0.16.0 hbond item. - - In 0.16.1, donor and acceptor are stored as a tuple(resname, - resid, atomid). In 0.16.0 and earlier they were stored as a string. - - """ - return (hb[:2] - + [atomformat.format(hb[2]), atomformat.format(hb[3])] - + hb[4:]) - - def generate_table(self): - """Generate a normalised table of the results. - - The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~HydrogenBondAnalysis.table`. - - See Also - -------- - HydrogenBondAnalysis.table - - """ - if self._timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warning(msg) - return - - num_records = np.sum([len(hframe) for hframe in self._timeseries]) - # build empty output table - dtype = [ - ("time", float), - ("donor_index", int), ("acceptor_index", int), - ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), - ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), - ("distance", float), ("angle", float)] - # according to Lukas' notes below, using a recarray at this stage is ineffective - # and speedups of ~x10 can be achieved by filling a standard array, like this: - out = np.empty((num_records,), dtype=dtype) - cursor = 0 # current row - for t, hframe in zip(self.timesteps, self._timeseries): - for (donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - # donor|acceptor = (resname, resid, atomid) - out[cursor] = (t, donor_index, acceptor_index) + \ - donor + acceptor + (distance, angle) - cursor += 1 - assert cursor == num_records, "Internal Error: Not all HB records stored" - self.table = out.view(np.recarray) - logger.debug("HBond: Stored results as table with %(num_records)d entries.", vars()) - - def _has_timeseries(self): - has_timeseries = self._timeseries is not None - if not has_timeseries: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warning(msg) - return has_timeseries - - def count_by_time(self): - """Counts the number of hydrogen bonds per timestep. - - Processes :attr:`HydrogenBondAnalysis._timeseries` into the time series - ``N(t)`` where ``N`` is the total number of observed hydrogen bonds at - time ``t``. - - Returns - ------- - counts : numpy.recarray - The resulting array can be thought of as rows ``(time, N)`` where - ``time`` is the time (in ps) of the time step and ``N`` is the - total number of hydrogen bonds. - - """ - if not self._has_timeseries(): - return - - out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)]) - for cursor, time_count in enumerate(zip(self.timesteps, - (len(series) for series in self._timeseries))): - out[cursor] = time_count - return out.view(np.recarray) - - def count_by_type(self): - """Counts the frequency of hydrogen bonds of a specific type. - - Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a - :class:`numpy.recarray` containing atom indices, residue names, residue - numbers (for donors and acceptors) and the fraction of the total time - during which the hydrogen bond was detected. - - Returns - ------- - counts : numpy.recarray - Each row of the array contains data to define a unique hydrogen - bond together with the frequency (fraction of the total time) that - it has been observed. - - - .. versionchanged:: 0.17.0 - The 1-based indices "donor_idx" and "acceptor_idx" are being - deprecated in favor of zero-based indices. - """ - if not self._has_timeseries(): - return - - hbonds = defaultdict(int) - for hframe in self._timeseries: - for (donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = donor - acceptor_resnm, acceptor_resid, acceptor_atom = acceptor - # generate unambigous key for current hbond \ - # (the donor_heavy_atom placeholder '?' is added later) - # idx_zero is redundant for an unambigous key, but included for - # consistency. - hb_key = ( - donor_index, acceptor_index, - donor_resnm, donor_resid, "?", donor_atom, - acceptor_resnm, acceptor_resid, acceptor_atom) - - hbonds[hb_key] += 1 - - # build empty output table - dtype = [ - ("donor_index", int), ("acceptor_index", int), ('donor_resnm', 'U4'), - ('donor_resid', int), ('donor_heavy_atom', 'U4'), ('donor_atom', 'U4'), - ('acceptor_resnm', 'U4'), ('acceptor_resid', int), ('acceptor_atom', 'U4'), - ('frequency', float) - ] - out = np.empty((len(hbonds),), dtype=dtype) - - # float because of division later - tsteps = float(len(self.timesteps)) - for cursor, (key, count) in enumerate(hbonds.items()): - out[cursor] = key + (count / tsteps,) - - # return array as recarray - # The recarray has not been used within the function, because accessing the - # the elements of a recarray (3.65 us) is much slower then accessing those - # of a ndarray (287 ns). - r = out.view(np.recarray) - - # patch in donor heavy atom names (replaces '?' in the key) - h2donor = self._donor_lookup_table_byindex() - r.donor_heavy_atom[:] = [h2donor[idx] for idx in r.donor_index] - - return r - - def timesteps_by_type(self): - """Frames during which each hydrogen bond existed, sorted by hydrogen bond. - - Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a - :class:`numpy.recarray` containing atom indices, residue names, residue - numbers (for donors and acceptors) and each timestep at which the - hydrogen bond was detected. - - In principle, this is the same as :attr:`~HydrogenBondAnalysis.table` - but sorted by hydrogen bond and with additional data for the - *donor_heavy_atom* and angle and distance omitted. - - - Returns - ------- - data : numpy.recarray - - - .. versionchanged:: 0.17.0 - The 1-based indices "donor_idx" and "acceptor_idx" are being - replaced in favor of zero-based indices. - - """ - if not self._has_timeseries(): - return - - hbonds = defaultdict(list) - for (t, hframe) in zip(self.timesteps, self._timeseries): - for (donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = donor - acceptor_resnm, acceptor_resid, acceptor_atom = acceptor - # generate unambigous key for current hbond - # (the donor_heavy_atom placeholder '?' is added later) - # idx_zero is redundant for key but added for consistency - hb_key = ( - donor_index, acceptor_index, - donor_resnm, donor_resid, "?", donor_atom, - acceptor_resnm, acceptor_resid, acceptor_atom) - hbonds[hb_key].append(t) - - out_nrows = 0 - # count number of timesteps per key to get length of output table - for ts_list in hbonds.values(): - out_nrows += len(ts_list) - - # build empty output table - dtype = [ - ('donor_index', int), - ('acceptor_index', int), ('donor_resnm', 'U4'), ('donor_resid', int), - ('donor_heavy_atom', 'U4'), ('donor_atom', 'U4'),('acceptor_resnm', 'U4'), - ('acceptor_resid', int), ('acceptor_atom', 'U4'), ('time', float)] - out = np.empty((out_nrows,), dtype=dtype) - - out_row = 0 - for (key, times) in hbonds.items(): - for tstep in times: - out[out_row] = key + (tstep,) - out_row += 1 - - # return array as recarray - # The recarray has not been used within the function, because accessing the - # the elements of a recarray (3.65 us) is much slower then accessing those - # of a ndarray (287 ns). - r = out.view(np.recarray) - - # patch in donor heavy atom names (replaces '?' in the key) - h2donor = self._donor_lookup_table_byindex() - r.donor_heavy_atom[:] = [h2donor[idx] for idx in r.donor_index] - - return r - - def _donor_lookup_table_byres(self): - """Look-up table to identify the donor heavy atom from resid and hydrogen name. - - Assumptions: - * resids are unique - * hydrogen atom names are unique within a residue - * selections have not changed (because we are simply looking at the last content - of the donors and donor hydrogen lists) - - Donors from `selection1` and `selection2` are merged. - - Output dictionary ``h2donor`` can be used as:: - - heavy_atom_name = h2donor[resid][hydrogen_name] - - """ - s1d = self._s1_donors # list of donor Atom instances - s1h = self._s1_donors_h # dict indexed by donor position in donor list, containg AtomGroups of H - s2d = self._s2_donors - s2h = self._s2_donors_h - - def _make_dict(donors, hydrogens): - # two steps so that entry for one residue can be UPDATED for multiple donors - d = dict((donors[k].resid, {}) for k in range(len(donors)) if k in hydrogens) - for k in range(len(donors)): - if k in hydrogens: - d[donors[k].resid].update(dict((atom.name, donors[k].name) for atom in hydrogens[k])) - return d - - h2donor = _make_dict(s2d, s2h) # 2 is typically the larger group - # merge (in principle h2donor.update(_make_dict(s1d, s1h) should be sufficient - # with our assumptions but the following should be really safe) - for resid, names in _make_dict(s1d, s1h).items(): - if resid in h2donor: - h2donor[resid].update(names) - else: - h2donor[resid] = names - - return h2donor - - def _donor_lookup_table_byindex(self): - """Look-up table to identify the donor heavy atom from hydrogen atom index. - - Assumptions: - * selections have not changed (because we are simply looking at the last content - of the donors and donor hydrogen lists) - - Donors from `selection1` and `selection2` are merged. - - Output dictionary ``h2donor`` can be used as:: - - heavy_atom_name = h2donor[index] - - """ - s1d = self._s1_donors # list of donor Atom instances - s1h = self._s1_donors_h # dict indexed by donor position in donor list, containg AtomGroups of H - s2d = self._s2_donors - s2h = self._s2_donors_h - - def _make_dict(donors, hydrogens): - #return dict(flatten_1([(atom.id, donors[k].name) for atom in hydrogens[k]] for k in range(len(donors)) - # if k in hydrogens)) - x = [] - for k in range(len(donors)): - if k in hydrogens: - x.extend([(atom.index, donors[k].name) for atom in hydrogens[k]]) - return dict(x) - - h2donor = _make_dict(s2d, s2h) # 2 is typically the larger group - h2donor.update(_make_dict(s1d, s1h)) - - return h2donor - - - def autocorrelation(self, tau_max=20, window_step=1, intermittency=0): - """ - Computes and returns the autocorrelation (HydrogenBondLifetimes ) on the computed hydrogen bonds. - - Parameters - ---------- - window_step : int, optional - Jump every `step`-th frame. This is compatible but independant of the taus used. - Note that `step` and `tau_max` work consistently with intermittency. - tau_max : int, optional - Survival probability is calculated for the range 1 <= `tau` <= `tau_max` - intermittency : int, optional - The maximum number of consecutive frames for which a bond can disappear but be counted as present if it - returns at the next frame. An intermittency of `0` is equivalent to a continuous autocorrelation, which does - not allow for the hydrogen bond disappearance. For example, for `intermittency=2`, any given hbond may - disappear for up to two consecutive frames yet be treated as being present at all frames. - The default is continuous (0). - - Returns - ------- - tau_timeseries : list - tau from 1 to `tau_max`. Saved in the field tau_timeseries. - timeseries : list - autcorrelation value for each value of `tau`. Saved in the field timeseries. - timeseries_data: list - raw datapoints from which the average is taken (timeseries). - Time dependancy and distribution can be extracted. - """ - - if self._timeseries is None: - logging.error("Autocorrelation analysis of hydrogen bonds cannot be done before the hydrogen bonds are found") - logging.error("Autocorrelation: Please use the .run() before calling this function") - return - - if self.step != 1: - logging.warning("Autocorrelation function should be carried out on consecutive frames. ") - logging.warning("Autocorrelation: if you would like to allow bonds to break and reform, please use 'intermittency'") - - # Extract the hydrogen bonds IDs only in the format [set(superset(x1,x2), superset(x3,x4)), ..] - hydrogen_bonds = self.timeseries - found_hydrogen_bonds = [{frozenset(bond[0:2]) for bond in frame} for frame in hydrogen_bonds] - - intermittent_hbonds = correct_intermittency(found_hydrogen_bonds, intermittency=intermittency) - tau_timeseries, timeseries, timeseries_data = autocorrelation(intermittent_hbonds, tau_max, - window_step=window_step) - self.acf_tau_timeseries = tau_timeseries - self.acf_timeseries = timeseries - # user can investigate the distribution and sample size - self.acf_timeseries_data = timeseries_data - - return self diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index ae993d47c09..0387e0827d4 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -20,589 +20,14 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -""" -Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` -==================================================================================== - -:Author: Richard J. Gowers -:Year: 2014 -:Copyright: GNU Public License v3 - -.. versionadded:: 0.9.0 - -Description ------------ - -Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen -bonds in the selections passed to it. The population of hydrogen bonds at a -given startpoint, :math:`t_0`, is evaluated based on geometric criteria and -then the lifetime of these bonds is monitored over time. Multiple passes -through the trajectory are used to build an average of the behaviour. - -.. math:: - C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle - -The subscript :math:`x` refers to the definition of lifetime being used, either -continuous or intermittent. The continuous definition measures the time that -a particular hydrogen bond remains continuously attached, whilst the -intermittent definition allows a bond to break and then subsequently reform and -be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found -via integration of this function - -.. math:: - \\tau_x = \\int_0^\\infty C_x(t) dt` - -For this, the observed behaviour is fitted to a multi exponential function, -using 2 exponents for the continuous lifetime and 3 for the intermittent -lifetime. - - :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) - + A_2 \\exp( - t / \\tau_2) - [+ A_3 \\exp( - t / \\tau_3)]` - -Where the final pre expoential factor :math:`A_n` is subject to the condition: - - :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` - -For further details see [Gowers2015]_. - -.. rubric:: References - -.. [Gowers2015] Richard J. Gowers and Paola Carbone, - A multiscale approach to model hydrogen bonding: The case of polyamide - The Journal of Chemical Physics, 142, 224907 (2015), - DOI:http://dx.doi.org/10.1063/1.4922445 - -Input ------ - -Three AtomGroup selections representing the **hydrogens**, **donors** and -**acceptors** that you wish to analyse. Note that the **hydrogens** and -**donors** selections must be aligned, that is **hydrogens[0]** and -**donors[0]** must represent a bonded pair. For systems such as water, -this will mean that each oxygen appears twice in the **donors** AtomGroup. -The function :func:`find_hydrogen_donors` can be used to construct the donor -AtomGroup -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import waterPSF, waterDCD - u = mda.Universe(waterPSF, waterDCD) - hydrogens = u.select_atoms('name H*') - donors = hbonds.find_hydrogen_donors(hydrogens) - - -Note that this requires the Universe to have bond information. If this isn't -present in the topology file, the -:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used -as so -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import GRO - # we could load the Universe with guess_bonds=True - # but this would guess **all** bonds - u = mda.Universe(GRO) - water = u.select_atoms('resname SOL and not type DUMMY') - # guess bonds only within our water atoms - # this adds the bond information directly to the Universe - water.guess_bonds() - hydrogens = water.select_atoms('type H') - # this is now possible as we guessed the bonds - donors = hbonds.find_hydrogen_donors(hydrogens) - - -The keyword **exclusions** allows a tuple of array addresses to be provided, -(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be -counted as part of the analysis. This could be used to exclude the -consideration of hydrogen bonds within the same functional group, or to perform -analysis on strictly intermolecular hydrogen bonding. - -Hydrogen bonds are defined on the basis of geometric criteria; a -Hydrogen-Acceptor distance of less then **dist_crit** and a -Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. - -The length of trajectory to analyse in ps, **sample_time**, is used to choose -what length to analyse. - -Multiple passes, controlled by the keyword **nruns**, through the trajectory -are performed and an average calculated. For each pass, **nsamples** number -of points along the run are calculated. - - -Output ------- - -All results of the analysis are available through the *solution* attribute. -This is a dictionary with the following keys - -- *results* The raw results of the time autocorrelation function. -- *time* Time axis, in ps, for the results. -- *fit* Results of the exponential curve fitting procedure. For the - *continuous* lifetime these are (A1, tau1, tau2), for the - *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). -- *tau* Calculated time constant from the fit. -- *estimate* Estimated values generated by the calculated fit. - -The *results* and *time* values are only filled after the :meth:`run` method, -*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been -used. - - -Worked Example for Polyamide ----------------------------- - -This example finds the continuous hydrogen bond lifetime between N-H..O in a -polyamide system. This will use the default geometric definition for hydrogen -bonds of length 3.0 Å and angle of 130 degrees. -It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 -sample point within this time window (this relies upon the trajectory being -sampled frequently enough). This process is repeated for 20 different start -points to build a better average. - -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import TRZ_psf, TRZ - import matplotlib.pyplot as plt - # load system - u = mda.Universe(TRZ_psf, TRZ) - # select atoms of interest into AtomGroups - H = u.select_atoms('name Hn') - N = u.select_atoms('name N') - O = u.select_atoms('name O') - # create analysis object - hb_ac = hbonds.HydrogenBondAutoCorrel(u, - acceptors=O, hydrogens=H, donors=N, - bond_type='continuous', - sample_time=2.0, nsamples=1000, nruns=20) - # call run to gather results - hb_ac.run() - # attempt to fit results to exponential equation - hb_ac.solve() - # grab results from inside object - tau = hb_ac.solution['tau'] - time = hb_ac.solution['time'] - results = hb_ac.solution['results'] - estimate = hb_ac.solution['estimate'] - # plot to check! - plt.plot(time, results, 'ro') - plt.plot(time, estimate) - plt.show() - - -Classes -------- - -.. autofunction:: find_hydrogen_donors - -.. autoclass:: HydrogenBondAutoCorrel - :members: - - -""" -import numpy as np -import scipy.optimize import warnings -from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds -from MDAnalysis.core.groups import requires - -from MDAnalysis.due import due, Doi -due.cite(Doi("10.1063/1.4922445"), - description="Hydrogen bonding autocorrelation time", - path='MDAnalysis.analysis.hbonds.hbond_autocorrel', -) -del Doi - - -@requires('bonds') -def find_hydrogen_donors(hydrogens): - """Returns the donor atom for each hydrogen - - Parameters - ---------- - hydrogens : AtomGroup - the hydrogens that will form hydrogen bonds - - Returns - ------- - donors : AtomGroup - the donor atom for each hydrogen, found via bond information - - - .. versionadded:: 0.20.0 - """ - return sum(h.bonded_atoms[0] for h in hydrogens) - - -class HydrogenBondAutoCorrel(object): - """Perform a time autocorrelation of the hydrogen bonds in the system. - - Parameters - ---------- - universe : Universe - MDAnalysis Universe that all selections belong to - hydrogens : AtomGroup - AtomGroup of Hydrogens which can form hydrogen bonds - acceptors : AtomGroup - AtomGroup of all Acceptor atoms - donors : AtomGroup - The atoms which are connected to the hydrogens. This group - must be identical in length to the hydrogen group and matched, - ie hydrogens[0] is bonded to donors[0]. - For water, this will mean a donor appears twice in this - group, once for each hydrogen. - bond_type : str - Which definition of hydrogen bond lifetime to consider, either - 'continuous' or 'intermittent'. - exclusions : ndarray, optional - Indices of Hydrogen-Acceptor pairs to be excluded. - With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances - is calculated, *exclusions* is used as a mask on this array to exclude - some pairs. - angle_crit : float, optional - The angle (in degrees) which all bonds must be greater than [130.0] - dist_crit : float, optional - The maximum distance (in Angstroms) for a hydrogen bond [3.0] - sample_time : float, optional - The amount of time, in ps, that you wish to observe hydrogen - bonds for [100] - nruns : int, optional - The number of different start points within the trajectory - to use [1] - nsamples : int, optional - Within each run, the number of frames to analyse [50] - pbc : bool, optional - Whether to consider periodic boundaries in calculations [``True``] - - ..versionchanged: 1.0.0 - ``save_results()`` method was removed. You can instead use ``np.savez()`` - on :attr:`HydrogenBondAutoCorrel.solution['time']` and - :attr:`HydrogenBondAutoCorrel.solution['results']` instead. - """ - - def __init__(self, universe, - hydrogens=None, acceptors=None, donors=None, - bond_type=None, - exclusions=None, - angle_crit=130.0, dist_crit=3.0, # geometric criteria - sample_time=100, # expected length of the decay in ps - time_cut=None, # cutoff time for intermittent hbonds - nruns=1, # number of times to iterate through the trajectory - nsamples=50, # number of different points to sample in a run - pbc=True): - - #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " - # "which has .autocorrelation function", - # category=DeprecationWarning) - - self.u = universe - # check that slicing is possible - try: - self.u.trajectory[0] - except Exception: - raise ValueError("Trajectory must support slicing") from None - - self.h = hydrogens - self.a = acceptors - self.d = donors - if not len(self.h) == len(self.d): - raise ValueError("Donors and Hydrogen groups must be identical " - "length. Try using `find_hydrogen_donors`.") - - self.exclusions = exclusions - if self.exclusions: - if not len(self.exclusions[0]) == len(self.exclusions[1]): - raise ValueError( - "'exclusion' must be two arrays of identical length") - - self.bond_type = bond_type - if self.bond_type not in ['continuous', 'intermittent']: - raise ValueError( - "bond_type must be either 'continuous' or 'intermittent'") - - self.a_crit = np.deg2rad(angle_crit) - self.d_crit = dist_crit - self.pbc = pbc - self.sample_time = sample_time - self.nruns = nruns - self.nsamples = nsamples - self._slice_traj(sample_time) - self.time_cut = time_cut - - self.solution = { - 'results': None, # Raw results - 'time': None, # Time axis of raw results - 'fit': None, # coefficients for fit - 'tau': None, # integral of exponential fit - 'estimate': None # y values of fit against time - } - - def _slice_traj(self, sample_time): - """Set up start and end points in the trajectory for the - different passes - """ - dt = self.u.trajectory.dt # frame step size in time - req_frames = int(sample_time / dt) # the number of frames required - - n_frames = len(self.u.trajectory) - if req_frames > n_frames: - warnings.warn("Number of required frames ({}) greater than the" - " number of frames in trajectory ({})" - .format(req_frames, n_frames), RuntimeWarning) - - numruns = self.nruns - if numruns > n_frames: - numruns = n_frames - warnings.warn("Number of runs ({}) greater than the number of" - " frames in trajectory ({})" - .format(self.nruns, n_frames), RuntimeWarning) - - self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) - # limit stop points using clip - self._stops = np.clip(self._starts + req_frames, 0, n_frames) - - self._skip = req_frames // self.nsamples - if self._skip == 0: # If nsamples > req_frames - warnings.warn("Desired number of sample points too high, using {0}" - .format(req_frames), RuntimeWarning) - self._skip = 1 - - def run(self, force=False): - """Run all the required passes - - Parameters - ---------- - force : bool, optional - Will overwrite previous results if they exist - """ - # if results exist, don't waste any time - if self.solution['results'] is not None and not force: - return - - main_results = np.zeros_like(np.arange(self._starts[0], - self._stops[0], - self._skip), - dtype=np.float32) - # for normalising later - counter = np.zeros_like(main_results, dtype=np.float32) - - for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, - self._stops)), total=self.nruns, - desc="Performing run"): - - # needed else trj seek thinks a np.int64 isn't an int? - results = self._single_run(int(start), int(stop)) - - nresults = len(results) - if nresults == len(main_results): - main_results += results - counter += 1.0 - else: - main_results[:nresults] += results - counter[:nresults] += 1.0 - - main_results /= counter - - self.solution['time'] = np.arange( - len(main_results), - dtype=np.float32) * self.u.trajectory.dt * self._skip - self.solution['results'] = main_results - - def _single_run(self, start, stop): - """Perform a single pass of the trajectory""" - self.u.trajectory[start] - - # Calculate partners at t=0 - box = self.u.dimensions if self.pbc else None - - # 2d array of all distances - pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) - if self.exclusions: - # set to above dist crit to exclude - exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) - pair = np.delete(pair, np.where(pair==exclude), 0) - - hidx, aidx = np.transpose(pair) - - - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - # from amongst those, who also satisfiess angle crit - idx2 = np.where(a > self.a_crit) - hidx = hidx[idx2] - aidx = aidx[idx2] - - nbonds = len(hidx) # number of hbonds at t=0 - results = np.zeros_like(np.arange(start, stop, self._skip), - dtype=np.float32) - - if self.time_cut: - # counter for time criteria - count = np.zeros(nbonds, dtype=np.float64) - - for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): - box = self.u.dimensions if self.pbc else None - - d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], - box=box) - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - - winners = (d < self.d_crit) & (a > self.a_crit) - results[i] = winners.sum() - - if self.bond_type == 'continuous': - # Remove losers for continuous definition - hidx = hidx[np.where(winners)] - aidx = aidx[np.where(winners)] - elif self.bond_type == 'intermittent': - if self.time_cut: - # Add to counter of where losers are - count[~ winners] += self._skip * self.u.trajectory.dt - count[winners] = 0 # Reset timer for winners - - # Remove if you've lost too many times - # New arrays contain everything but removals - hidx = hidx[count < self.time_cut] - aidx = aidx[count < self.time_cut] - count = count[count < self.time_cut] - else: - pass - - if len(hidx) == 0: # Once everyone has lost, the fun stops - break - - results /= nbonds - - return results - - def solve(self, p_guess=None): - """Fit results to an multi exponential decay and integrate to find - characteristic time - - Parameters - ---------- - p_guess : tuple of floats, optional - Initial guess for the leastsq fit, must match the shape of the - expected coefficients - - - Continuous defition results are fitted to a double exponential with - :func:`scipy.optimize.leastsq`, intermittent definition are fit to a - triple exponential. - - The results of this fitting procedure are saved into the *fit*, - *tau* and *estimate* keywords in the solution dict. - - - *fit* contains the coefficients, (A1, tau1, tau2) or - (A1, A2, tau1, tau2, tau3) - - *tau* contains the calculated lifetime in ps for the hydrogen - bonding - - *estimate* contains the estimate provided by the fit of the time - autocorrelation function - - In addition, the output of the :func:`~scipy.optimize.leastsq` function - is saved into the solution dict - - - *infodict* - - *mesg* - - *ier* - - """ - - if self.solution['results'] is None: - raise ValueError( - "Results have not been generated use, the run method first") - - # Prevents an odd bug with leastsq where it expects - # double precision data sometimes... - time = self.solution['time'].astype(np.float64) - results = self.solution['results'].astype(np.float64) - - def within_bounds(p): - """Returns True/False if boundary conditions are met or not. - Uses length of p to detect whether it's handling continuous / - intermittent - - Boundary conditions are: - 0 < A_x < 1 - sum(A_x) < 1 - 0 < tau_x - """ - if len(p) == 3: - A1, tau1, tau2 = p - return (A1 > 0.0) & (A1 < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) - elif len(p) == 5: - A1, A2, tau1, tau2, tau3 = p - return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ - (A2 < 1.0) & ((A1 + A2) < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) - - def err(p, x, y): - """Custom residual function, returns real residual if all - boundaries are met, else returns a large number to trick the - leastsq algorithm - """ - if within_bounds(p): - return y - self._my_solve(x, *p) - else: - return np.full_like(y, 100000) - - def double(x, A1, tau1, tau2): - """ Sum of two exponential functions """ - A2 = 1 - A1 - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) - - def triple(x, A1, A2, tau1, tau2, tau3): - """ Sum of three exponential functions """ - A3 = 1 - (A1 + A2) - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) - - if self.bond_type == 'continuous': - self._my_solve = double - - if p_guess is None: - p_guess = (0.5, 10 * self.sample_time, self.sample_time) - - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, tau1, tau2 = p - A2 = 1 - A1 - self.solution['tau'] = A1 * tau1 + A2 * tau2 - else: - self._my_solve = triple - - if p_guess is None: - p_guess = (0.33, 0.33, 10 * self.sample_time, - self.sample_time, 0.1 * self.sample_time) - - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, A2, tau1, tau2, tau3 = p - A3 = 1 - A1 - A2 - self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 - - self.solution['infodict'] = infodict - self.solution['mesg'] = mesg - self.solution['ier'] = ier - - if ier in [1, 2, 3, 4]: # solution found if ier is one of these values - self.solution['estimate'] = self._my_solve( - self.solution['time'], *p) - else: - warnings.warn("Solution to results not found", RuntimeWarning) +with warnings.catch_warnings(): + warnings.simplefilter('always', DeprecationWarning) + wmsg = ("This module has been moved to " + "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel. " + "The module will be removed from here in MDAnalysis 2.1.0.") + warnings.warn(wmsg, category=DeprecationWarning) - def __repr__(self): - return ("" - "".format(btype=self.bond_type, n=len(self.h))) +from MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel import * diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index d0284cf6f18..9476d064138 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -23,8 +23,10 @@ __all__ = [ 'HydrogenBondAnalysis', - 'WaterBridgeAnalysis' + 'WaterBridgeAnalysis', + 'HydrogenBondAutoCorrel', 'find_hydrogen_donors' ] from .hbond_analysis import HydrogenBondAnalysis from .wbridge_analysis import WaterBridgeAnalysis +from .hbond_autocorrel import HydrogenBondAutoCorrel, find_hydrogen_donors diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py new file mode 100644 index 00000000000..ae993d47c09 --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py @@ -0,0 +1,608 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` +==================================================================================== + +:Author: Richard J. Gowers +:Year: 2014 +:Copyright: GNU Public License v3 + +.. versionadded:: 0.9.0 + +Description +----------- + +Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen +bonds in the selections passed to it. The population of hydrogen bonds at a +given startpoint, :math:`t_0`, is evaluated based on geometric criteria and +then the lifetime of these bonds is monitored over time. Multiple passes +through the trajectory are used to build an average of the behaviour. + +.. math:: + C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle + +The subscript :math:`x` refers to the definition of lifetime being used, either +continuous or intermittent. The continuous definition measures the time that +a particular hydrogen bond remains continuously attached, whilst the +intermittent definition allows a bond to break and then subsequently reform and +be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found +via integration of this function + +.. math:: + \\tau_x = \\int_0^\\infty C_x(t) dt` + +For this, the observed behaviour is fitted to a multi exponential function, +using 2 exponents for the continuous lifetime and 3 for the intermittent +lifetime. + + :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) + + A_2 \\exp( - t / \\tau_2) + [+ A_3 \\exp( - t / \\tau_3)]` + +Where the final pre expoential factor :math:`A_n` is subject to the condition: + + :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` + +For further details see [Gowers2015]_. + +.. rubric:: References + +.. [Gowers2015] Richard J. Gowers and Paola Carbone, + A multiscale approach to model hydrogen bonding: The case of polyamide + The Journal of Chemical Physics, 142, 224907 (2015), + DOI:http://dx.doi.org/10.1063/1.4922445 + +Input +----- + +Three AtomGroup selections representing the **hydrogens**, **donors** and +**acceptors** that you wish to analyse. Note that the **hydrogens** and +**donors** selections must be aligned, that is **hydrogens[0]** and +**donors[0]** must represent a bonded pair. For systems such as water, +this will mean that each oxygen appears twice in the **donors** AtomGroup. +The function :func:`find_hydrogen_donors` can be used to construct the donor +AtomGroup +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import waterPSF, waterDCD + u = mda.Universe(waterPSF, waterDCD) + hydrogens = u.select_atoms('name H*') + donors = hbonds.find_hydrogen_donors(hydrogens) + + +Note that this requires the Universe to have bond information. If this isn't +present in the topology file, the +:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used +as so +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import GRO + # we could load the Universe with guess_bonds=True + # but this would guess **all** bonds + u = mda.Universe(GRO) + water = u.select_atoms('resname SOL and not type DUMMY') + # guess bonds only within our water atoms + # this adds the bond information directly to the Universe + water.guess_bonds() + hydrogens = water.select_atoms('type H') + # this is now possible as we guessed the bonds + donors = hbonds.find_hydrogen_donors(hydrogens) + + +The keyword **exclusions** allows a tuple of array addresses to be provided, +(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be +counted as part of the analysis. This could be used to exclude the +consideration of hydrogen bonds within the same functional group, or to perform +analysis on strictly intermolecular hydrogen bonding. + +Hydrogen bonds are defined on the basis of geometric criteria; a +Hydrogen-Acceptor distance of less then **dist_crit** and a +Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. + +The length of trajectory to analyse in ps, **sample_time**, is used to choose +what length to analyse. + +Multiple passes, controlled by the keyword **nruns**, through the trajectory +are performed and an average calculated. For each pass, **nsamples** number +of points along the run are calculated. + + +Output +------ + +All results of the analysis are available through the *solution* attribute. +This is a dictionary with the following keys + +- *results* The raw results of the time autocorrelation function. +- *time* Time axis, in ps, for the results. +- *fit* Results of the exponential curve fitting procedure. For the + *continuous* lifetime these are (A1, tau1, tau2), for the + *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). +- *tau* Calculated time constant from the fit. +- *estimate* Estimated values generated by the calculated fit. + +The *results* and *time* values are only filled after the :meth:`run` method, +*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been +used. + + +Worked Example for Polyamide +---------------------------- + +This example finds the continuous hydrogen bond lifetime between N-H..O in a +polyamide system. This will use the default geometric definition for hydrogen +bonds of length 3.0 Å and angle of 130 degrees. +It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 +sample point within this time window (this relies upon the trajectory being +sampled frequently enough). This process is repeated for 20 different start +points to build a better average. + +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import TRZ_psf, TRZ + import matplotlib.pyplot as plt + # load system + u = mda.Universe(TRZ_psf, TRZ) + # select atoms of interest into AtomGroups + H = u.select_atoms('name Hn') + N = u.select_atoms('name N') + O = u.select_atoms('name O') + # create analysis object + hb_ac = hbonds.HydrogenBondAutoCorrel(u, + acceptors=O, hydrogens=H, donors=N, + bond_type='continuous', + sample_time=2.0, nsamples=1000, nruns=20) + # call run to gather results + hb_ac.run() + # attempt to fit results to exponential equation + hb_ac.solve() + # grab results from inside object + tau = hb_ac.solution['tau'] + time = hb_ac.solution['time'] + results = hb_ac.solution['results'] + estimate = hb_ac.solution['estimate'] + # plot to check! + plt.plot(time, results, 'ro') + plt.plot(time, estimate) + plt.show() + + +Classes +------- + +.. autofunction:: find_hydrogen_donors + +.. autoclass:: HydrogenBondAutoCorrel + :members: + + +""" +import numpy as np +import scipy.optimize + +import warnings + +from MDAnalysis.lib.log import ProgressBar +from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds +from MDAnalysis.core.groups import requires + +from MDAnalysis.due import due, Doi +due.cite(Doi("10.1063/1.4922445"), + description="Hydrogen bonding autocorrelation time", + path='MDAnalysis.analysis.hbonds.hbond_autocorrel', +) +del Doi + + +@requires('bonds') +def find_hydrogen_donors(hydrogens): + """Returns the donor atom for each hydrogen + + Parameters + ---------- + hydrogens : AtomGroup + the hydrogens that will form hydrogen bonds + + Returns + ------- + donors : AtomGroup + the donor atom for each hydrogen, found via bond information + + + .. versionadded:: 0.20.0 + """ + return sum(h.bonded_atoms[0] for h in hydrogens) + + +class HydrogenBondAutoCorrel(object): + """Perform a time autocorrelation of the hydrogen bonds in the system. + + Parameters + ---------- + universe : Universe + MDAnalysis Universe that all selections belong to + hydrogens : AtomGroup + AtomGroup of Hydrogens which can form hydrogen bonds + acceptors : AtomGroup + AtomGroup of all Acceptor atoms + donors : AtomGroup + The atoms which are connected to the hydrogens. This group + must be identical in length to the hydrogen group and matched, + ie hydrogens[0] is bonded to donors[0]. + For water, this will mean a donor appears twice in this + group, once for each hydrogen. + bond_type : str + Which definition of hydrogen bond lifetime to consider, either + 'continuous' or 'intermittent'. + exclusions : ndarray, optional + Indices of Hydrogen-Acceptor pairs to be excluded. + With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances + is calculated, *exclusions* is used as a mask on this array to exclude + some pairs. + angle_crit : float, optional + The angle (in degrees) which all bonds must be greater than [130.0] + dist_crit : float, optional + The maximum distance (in Angstroms) for a hydrogen bond [3.0] + sample_time : float, optional + The amount of time, in ps, that you wish to observe hydrogen + bonds for [100] + nruns : int, optional + The number of different start points within the trajectory + to use [1] + nsamples : int, optional + Within each run, the number of frames to analyse [50] + pbc : bool, optional + Whether to consider periodic boundaries in calculations [``True``] + + ..versionchanged: 1.0.0 + ``save_results()`` method was removed. You can instead use ``np.savez()`` + on :attr:`HydrogenBondAutoCorrel.solution['time']` and + :attr:`HydrogenBondAutoCorrel.solution['results']` instead. + """ + + def __init__(self, universe, + hydrogens=None, acceptors=None, donors=None, + bond_type=None, + exclusions=None, + angle_crit=130.0, dist_crit=3.0, # geometric criteria + sample_time=100, # expected length of the decay in ps + time_cut=None, # cutoff time for intermittent hbonds + nruns=1, # number of times to iterate through the trajectory + nsamples=50, # number of different points to sample in a run + pbc=True): + + #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + # "which has .autocorrelation function", + # category=DeprecationWarning) + + self.u = universe + # check that slicing is possible + try: + self.u.trajectory[0] + except Exception: + raise ValueError("Trajectory must support slicing") from None + + self.h = hydrogens + self.a = acceptors + self.d = donors + if not len(self.h) == len(self.d): + raise ValueError("Donors and Hydrogen groups must be identical " + "length. Try using `find_hydrogen_donors`.") + + self.exclusions = exclusions + if self.exclusions: + if not len(self.exclusions[0]) == len(self.exclusions[1]): + raise ValueError( + "'exclusion' must be two arrays of identical length") + + self.bond_type = bond_type + if self.bond_type not in ['continuous', 'intermittent']: + raise ValueError( + "bond_type must be either 'continuous' or 'intermittent'") + + self.a_crit = np.deg2rad(angle_crit) + self.d_crit = dist_crit + self.pbc = pbc + self.sample_time = sample_time + self.nruns = nruns + self.nsamples = nsamples + self._slice_traj(sample_time) + self.time_cut = time_cut + + self.solution = { + 'results': None, # Raw results + 'time': None, # Time axis of raw results + 'fit': None, # coefficients for fit + 'tau': None, # integral of exponential fit + 'estimate': None # y values of fit against time + } + + def _slice_traj(self, sample_time): + """Set up start and end points in the trajectory for the + different passes + """ + dt = self.u.trajectory.dt # frame step size in time + req_frames = int(sample_time / dt) # the number of frames required + + n_frames = len(self.u.trajectory) + if req_frames > n_frames: + warnings.warn("Number of required frames ({}) greater than the" + " number of frames in trajectory ({})" + .format(req_frames, n_frames), RuntimeWarning) + + numruns = self.nruns + if numruns > n_frames: + numruns = n_frames + warnings.warn("Number of runs ({}) greater than the number of" + " frames in trajectory ({})" + .format(self.nruns, n_frames), RuntimeWarning) + + self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) + # limit stop points using clip + self._stops = np.clip(self._starts + req_frames, 0, n_frames) + + self._skip = req_frames // self.nsamples + if self._skip == 0: # If nsamples > req_frames + warnings.warn("Desired number of sample points too high, using {0}" + .format(req_frames), RuntimeWarning) + self._skip = 1 + + def run(self, force=False): + """Run all the required passes + + Parameters + ---------- + force : bool, optional + Will overwrite previous results if they exist + """ + # if results exist, don't waste any time + if self.solution['results'] is not None and not force: + return + + main_results = np.zeros_like(np.arange(self._starts[0], + self._stops[0], + self._skip), + dtype=np.float32) + # for normalising later + counter = np.zeros_like(main_results, dtype=np.float32) + + for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, + self._stops)), total=self.nruns, + desc="Performing run"): + + # needed else trj seek thinks a np.int64 isn't an int? + results = self._single_run(int(start), int(stop)) + + nresults = len(results) + if nresults == len(main_results): + main_results += results + counter += 1.0 + else: + main_results[:nresults] += results + counter[:nresults] += 1.0 + + main_results /= counter + + self.solution['time'] = np.arange( + len(main_results), + dtype=np.float32) * self.u.trajectory.dt * self._skip + self.solution['results'] = main_results + + def _single_run(self, start, stop): + """Perform a single pass of the trajectory""" + self.u.trajectory[start] + + # Calculate partners at t=0 + box = self.u.dimensions if self.pbc else None + + # 2d array of all distances + pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) + if self.exclusions: + # set to above dist crit to exclude + exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) + pair = np.delete(pair, np.where(pair==exclude), 0) + + hidx, aidx = np.transpose(pair) + + + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + # from amongst those, who also satisfiess angle crit + idx2 = np.where(a > self.a_crit) + hidx = hidx[idx2] + aidx = aidx[idx2] + + nbonds = len(hidx) # number of hbonds at t=0 + results = np.zeros_like(np.arange(start, stop, self._skip), + dtype=np.float32) + + if self.time_cut: + # counter for time criteria + count = np.zeros(nbonds, dtype=np.float64) + + for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): + box = self.u.dimensions if self.pbc else None + + d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], + box=box) + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + + winners = (d < self.d_crit) & (a > self.a_crit) + results[i] = winners.sum() + + if self.bond_type == 'continuous': + # Remove losers for continuous definition + hidx = hidx[np.where(winners)] + aidx = aidx[np.where(winners)] + elif self.bond_type == 'intermittent': + if self.time_cut: + # Add to counter of where losers are + count[~ winners] += self._skip * self.u.trajectory.dt + count[winners] = 0 # Reset timer for winners + + # Remove if you've lost too many times + # New arrays contain everything but removals + hidx = hidx[count < self.time_cut] + aidx = aidx[count < self.time_cut] + count = count[count < self.time_cut] + else: + pass + + if len(hidx) == 0: # Once everyone has lost, the fun stops + break + + results /= nbonds + + return results + + def solve(self, p_guess=None): + """Fit results to an multi exponential decay and integrate to find + characteristic time + + Parameters + ---------- + p_guess : tuple of floats, optional + Initial guess for the leastsq fit, must match the shape of the + expected coefficients + + + Continuous defition results are fitted to a double exponential with + :func:`scipy.optimize.leastsq`, intermittent definition are fit to a + triple exponential. + + The results of this fitting procedure are saved into the *fit*, + *tau* and *estimate* keywords in the solution dict. + + - *fit* contains the coefficients, (A1, tau1, tau2) or + (A1, A2, tau1, tau2, tau3) + - *tau* contains the calculated lifetime in ps for the hydrogen + bonding + - *estimate* contains the estimate provided by the fit of the time + autocorrelation function + + In addition, the output of the :func:`~scipy.optimize.leastsq` function + is saved into the solution dict + + - *infodict* + - *mesg* + - *ier* + + """ + + if self.solution['results'] is None: + raise ValueError( + "Results have not been generated use, the run method first") + + # Prevents an odd bug with leastsq where it expects + # double precision data sometimes... + time = self.solution['time'].astype(np.float64) + results = self.solution['results'].astype(np.float64) + + def within_bounds(p): + """Returns True/False if boundary conditions are met or not. + Uses length of p to detect whether it's handling continuous / + intermittent + + Boundary conditions are: + 0 < A_x < 1 + sum(A_x) < 1 + 0 < tau_x + """ + if len(p) == 3: + A1, tau1, tau2 = p + return (A1 > 0.0) & (A1 < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) + elif len(p) == 5: + A1, A2, tau1, tau2, tau3 = p + return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ + (A2 < 1.0) & ((A1 + A2) < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) + + def err(p, x, y): + """Custom residual function, returns real residual if all + boundaries are met, else returns a large number to trick the + leastsq algorithm + """ + if within_bounds(p): + return y - self._my_solve(x, *p) + else: + return np.full_like(y, 100000) + + def double(x, A1, tau1, tau2): + """ Sum of two exponential functions """ + A2 = 1 - A1 + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + + def triple(x, A1, A2, tau1, tau2, tau3): + """ Sum of three exponential functions """ + A3 = 1 - (A1 + A2) + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) + + if self.bond_type == 'continuous': + self._my_solve = double + + if p_guess is None: + p_guess = (0.5, 10 * self.sample_time, self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, tau1, tau2 = p + A2 = 1 - A1 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + else: + self._my_solve = triple + + if p_guess is None: + p_guess = (0.33, 0.33, 10 * self.sample_time, + self.sample_time, 0.1 * self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, A2, tau1, tau2, tau3 = p + A3 = 1 - A1 - A2 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 + + self.solution['infodict'] = infodict + self.solution['mesg'] = mesg + self.solution['ier'] = ier + + if ier in [1, 2, 3, 4]: # solution found if ier is one of these values + self.solution['estimate'] = self._my_solve( + self.solution['time'], *p) + else: + warnings.warn("Solution to results not found", RuntimeWarning) + + def __repr__(self): + return ("" + "".format(btype=self.bond_type, n=len(self.h))) diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py deleted file mode 100644 index bf1ff37f873..00000000000 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ /dev/null @@ -1,552 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -# Note: to be removed with MDAnalysis.analysis.hbonds.hbond_analysis in 2.0 -import MDAnalysis -import MDAnalysis.analysis.hbonds -import itertools -import pytest -from MDAnalysis import SelectionError - -from numpy.testing import ( - assert_equal, assert_array_equal, assert_almost_equal, - assert_array_almost_equal, assert_allclose, -) -import numpy as np - - -from io import StringIO - -from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC, waterPSF, waterDCD - -# For type guessing: -from MDAnalysis.topology.core import guess_atom_type -from MDAnalysis.core.topologyattrs import Atomtypes - - -def guess_types(names): - """GRO doesn't supply types, this returns an Attr""" - return Atomtypes(np.array([guess_atom_type(name) for name in names], dtype=object)) - - -class TestHydrogenBondAnalysis(object): - @staticmethod - @pytest.fixture(scope='class') - def universe(): - return MDAnalysis.Universe(PDB_helix) - - @staticmethod - @pytest.fixture(scope='class') - def values(universe): - return { - 'num_bb_hbonds': universe.atoms.n_residues - universe.select_atoms('resname PRO').n_residues - 4, - 'donor_resid': np.array([5, 6, 8, 9, 10, 11, 12, 13]), - 'acceptor_resnm': np.array(['ALA', 'ALA', 'ALA', 'ALA', 'ALA', 'PRO', 'ALA', 'ALA'], dtype='U4'), - } - - kwargs = { - 'selection1': 'protein', - 'selection2': 'protein', - 'detect_hydrogens': "distance", - 'distance': 3.0, - 'angle': 150.0, - } - - @pytest.fixture(scope='class') - def h(self, universe): - kw = self.kwargs.copy() - # kw.update(kwargs) - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, **kw) - if kw['detect_hydrogens'] == 'heuristic': - with pytest.warns(DeprecationWarning): - h.run(verbose=False) - else: - h.run(verbose=False) - return h - - def test_helix_backbone(self, values, h): - - assert len(h.timeseries[0]) == values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds" - assert h.timesteps, [0.0] - - def test_generate_table(self, values, h): - - h.generate_table() - assert len(h.table) == values['num_bb_hbonds'], "wrong number of backbone hydrogen bonds in table" - assert isinstance(h.table, np.core.records.recarray) - assert_array_equal(h.table.donor_resid, values['donor_resid']) - assert_array_equal(h.table.acceptor_resnm, values['acceptor_resnm']) - - def test_atoms_too_far(self): - pdb = ''' -ATOM 1 N LEU 1 32.310 13.778 14.372 1.00 0.00 SYST N 0 -ATOM 2 OW SOL 2 3.024 4.456 4.147 1.00 0.00 SYST H 0''' - - u = MDAnalysis.Universe(StringIO(pdb), format="pdb") - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'resname SOL', 'protein') - h.run(verbose=False) - assert h.timeseries == [[]] - - def test_acceptor_OC1_OC2(self): - gro = '''test -3 - 1ALA OC1 1 0.000 0.000 0.000 - 2ALA N 2 0.300 0.000 0.000 - 2ALA H1 3 0.200 0.000 0.000 -7.29748 7.66094 9.82962''' - - u = MDAnalysis.Universe(StringIO(gro), format="gro") - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u) - h.run(verbose=False) - assert h.timeseries[0][0][2] == 'ALA2:H1' - - def test_true_traj(self): - u = MDAnalysis.Universe(GRO, XTC) - u.add_TopologyAttr(guess_types(u.atoms.names)) - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u,'protein','resname ASP', distance=3.0, angle=120.0) - h.run() - assert len(h.timeseries) == 10 - - def test_count_by_time(self, values, h): - - c = h.count_by_time() - assert c.tolist(), [(0.0, values['num_bb_hbonds'])] - - def test_count_by_type(self, values, h): - - c = h.count_by_type() - assert_equal(c.frequency, values['num_bb_hbonds'] * [1.0]) - - # TODO: Rename - def test_count_by_type2(self, values, h): - - t = h.timesteps_by_type() - assert_equal(t.time, values['num_bb_hbonds'] * [0.0]) - -class TestHydrogenBondAnalysisPBC(TestHydrogenBondAnalysis): - # This system is identical to above class - # But has a box introduced, and atoms moved into neighbouring images - # The results however should remain identical if PBC is used - # If pbc:True in kwargs is changed, these tests should fail - @staticmethod - @pytest.fixture(scope='class') - def universe(): - u = MDAnalysis.Universe(PDB_helix) - # transfer to memory to changes to coordinates are reset - u.transfer_to_memory() - # place in huge oversized box - # real system is about 30A wide at most - boxsize = 150. - box = np.array([boxsize, boxsize, boxsize, 90., 90., 90.]) - u.dimensions = box - - # then scatter the atoms throughout various images of this box - u.atoms[::4].translate([boxsize * 2, 0, 0]) - u.atoms[1::4].translate([0, boxsize *4, 0]) - u.atoms[2::4].translate([-boxsize * 5, 0, -boxsize * 2]) - - return u - - kwargs = { - 'selection1': 'protein', - 'selection2': 'protein', - 'detect_hydrogens': "distance", - 'distance': 3.0, - 'angle': 150.0, - 'pbc': True, - } - - - -class TestHydrogenBondAnalysisHeuristic(TestHydrogenBondAnalysis): - kwargs = { - 'selection1': 'protein', - 'selection2': 'protein', - 'detect_hydrogens': "heuristic", - 'distance': 3.0, - 'angle': 150.0, - } - - -class TestHydrogenBondAnalysisHeavy(TestHydrogenBondAnalysis): - - kwargs = { - 'selection1': 'protein', - 'selection2': 'protein', - 'detect_hydrogens': "heuristic", - 'distance': 3.5, - 'angle': 150.0, - 'distance_type': 'heavy', - } - - -class TestHydrogenBondAnalysisHeavyFail(TestHydrogenBondAnalysisHeavy): - kwargs = { - 'selection1': 'protein', - 'selection2': 'protein', - 'detect_hydrogens': "heuristic", - 'distance': 3.0, - 'angle': 150.0, - 'distance_type': 'heavy', - } - - @staticmethod - @pytest.fixture() - def values(universe): - return { - 'num_bb_hbonds': 0, # no H-bonds with a D-A distance < 3.0 A (they start at 3.05 A) - 'donor_resid': np.array([]), - 'acceptor_resnm': np.array([], dtype="= framePlus1 for frame, framePlus1 in zip(hbonds.acf_timeseries, hbonds.acf_timeseries[1:])]) - - - -class TestHydrogenBondAnalysisTIP3PHeavyPBC(object): - @staticmethod - @pytest.fixture(scope='class') - def universe(): - return MDAnalysis.Universe(waterPSF, waterDCD) - - kwargs = { - 'selection1': 'all', - 'selection2': 'all', - 'detect_hydrogens': "distance", - 'distance': 3.0, - 'angle': 120.0, - 'distance_type': 'heavy', - 'pbc': True - } - - @pytest.fixture(scope='class') - def h(self, universe): - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, **self.kwargs) - h.run(verbose=False) - h.generate_table() - return h - - @pytest.fixture(scope='class') - def normalized_timeseries(self, h): - # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) - # array index: 0 1 2 3 4 5 6 7 8 - timeseries = [[t] + item - for t, hframe in zip(h.timesteps, h.timeseries) - for item in hframe] - return timeseries - - def test_timeseries(self, h, normalized_timeseries): - - assert len(h.timeseries) == 10 - assert len(normalized_timeseries) == 32 - - reference = { - 'distance': {'mean': 2.7627309, 'std': 0.0905052}, - 'angle': {'mean': 158.9038039, 'std': 12.0362826}, - } - - assert_allclose(np.mean([hbond[5] for hbond in normalized_timeseries]), reference['distance']['mean']) - assert_allclose(np.std([hbond[5] for hbond in normalized_timeseries]), reference['distance']['std']) - assert_allclose(np.mean([hbond[6] for hbond in normalized_timeseries]), reference['angle']['mean']) - assert_allclose(np.std([hbond[6] for hbond in normalized_timeseries]), reference['angle']['std']) - - def test_count_by_time(self, h): - - ref_times = np.arange(0.02, 0.21, 0.02) - ref_counts = np.array([3, 2, 4, 4, 4, 4, 3, 2, 3, 3]) - - times, counts = np.array([[count[0], count[1]] for count in h.count_by_time()]).T - - assert_array_almost_equal(times, ref_times) - assert_array_equal(counts, ref_counts) - - def test_count_by_type(self, h): - - ref_values = [1.0, 1.0, 0.5, 0.4, 0.2, 0.1] - - c = h.count_by_type() - assert_equal(np.sort(c.frequency)[::-1], ref_values) - - -class TestHydrogenBondAnalysisTIP3PHeavyPBCStartStep(object): - @staticmethod - @pytest.fixture(scope='class') - def universe(): - return MDAnalysis.Universe(waterPSF, waterDCD) - - kwargs = { - 'selection1': 'all', - 'selection2': 'all', - 'detect_hydrogens': "distance", - 'distance': 3.0, - 'angle': 120.0, - 'distance_type': 'heavy', - 'pbc': True - } - - @pytest.fixture(scope='class') - def h(self, universe): - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, **self.kwargs) - h.run(start=1, step=2, verbose=False) - h.generate_table() - return h - - @pytest.fixture(scope='class') - def normalized_timeseries(self, h): - # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) - # array index: 0 1 2 3 4 5 6 7 8 - timeseries = [[t] + item - for t, hframe in zip(h.timesteps, h.timeseries) - for item in hframe] - return timeseries - - def test_timeseries(self, h, normalized_timeseries): - - assert len(h.timeseries) == 5 - assert len(normalized_timeseries) == 15 - - reference = { - 'distance': {'mean': 2.73942464, 'std': 0.05867924}, - 'angle': {'mean': 157.07768079, 'std': 9.72636682}, - } - - assert_allclose(np.mean([hbond[5] for hbond in normalized_timeseries]), reference['distance']['mean']) - assert_allclose(np.std([hbond[5] for hbond in normalized_timeseries]), reference['distance']['std']) - assert_allclose(np.mean([hbond[6] for hbond in normalized_timeseries]), reference['angle']['mean']) - assert_allclose(np.std([hbond[6] for hbond in normalized_timeseries]), reference['angle']['std']) - - def test_count_by_time(self, h): - - ref_times = np.array([0.04, 0.08, 0.12, 0.16, 0.20]) - ref_counts = np.array([2, 4, 4, 2, 3]) - - times, counts = np.array([[count[0], count[1]] for count in h.count_by_time()]).T - - assert_array_almost_equal(times, ref_times) - assert_array_equal(counts, ref_counts) diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py index 58fc63d58a1..4968ba363bb 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py @@ -33,8 +33,8 @@ import os import MDAnalysis as mda -from MDAnalysis.analysis import hbonds -from MDAnalysis.analysis.hbonds import HydrogenBondAutoCorrel as HBAC +from MDAnalysis.analysis import hydrogenbonds +from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAutoCorrel as HBAC class TestHydrogenBondAutocorrel(object): @@ -275,20 +275,29 @@ def test_repr(self, u, hydrogens, oxygens, nitrogens): ) assert isinstance(repr(hbond), str) + def test_find_donors(): u = mda.Universe(waterPSF, waterDCD) H = u.select_atoms('name H*') - D = hbonds.find_hydrogen_donors(H) + D = hydrogenbonds.find_hydrogen_donors(H) assert len(H) == len(D) # check each O is bonded to the corresponding H for h_atom, o_atom in zip(H, D): assert o_atom in h_atom.bonded_atoms + def test_donors_nobonds(): u = mda.Universe(XYZ_mini) with pytest.raises(mda.NoDataError): - hbonds.find_hydrogen_donors(u.atoms) + hydrogenbonds.find_hydrogen_donors(u.atoms) + + +def test_moved_module_warning(): + wmsg = ("This module has been moved to " + "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel") + with pytest.warns(DeprecationWarning, match=wmsg): + import MDAnalysis.analysis.hbonds.hbond_autocorrel From fc43502621cddf87326e6b7f5d17ec6573f9097c Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 2 May 2021 21:26:23 +0100 Subject: [PATCH 2/7] Deprecated in situ instead --- package/CHANGELOG | 4 + .../analysis/hbonds/hbond_autocorrel.py | 595 ++++++++++++++++- .../analysis/hydrogenbonds/__init__.py | 1 - .../hydrogenbonds/hbond_autocorrel.py | 608 ------------------ .../analysis/test_hydrogenbondautocorrel.py | 13 +- 5 files changed, 602 insertions(+), 619 deletions(-) delete mode 100644 package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py diff --git a/package/CHANGELOG b/package/CHANGELOG index 740e41a51c9..43eea09fe4a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -168,6 +168,8 @@ Enhancements checking if it can be used in parallel analysis. (Issue #2996, PR #2950) Changes + * Deprecated analysis.hbonds.hbond_analysis has been removed in favour of + analysis.hydrogenbonds.hbond_analysis (Issues #2739, #2746) * Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134) * TPRParser now loads TPR files with `tpr_resid_from_one=True` by default, which starts TPR resid indexing from 1 (instead of 0 as in 1.x) (Issue #2364, PR #3152) @@ -211,6 +213,8 @@ Changes * `threadpoolctl` is required for installation (Issue #2860) Deprecations + * The analysis.hbonds.hbond_autocorrel code has been moved to + analysis.hydrogenbonds.hbond_autocorrel (PR #3258) 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 0387e0827d4..55c75ec72bd 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -20,14 +20,601 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # +""" +Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` +==================================================================================== + +:Author: Richard J. Gowers +:Year: 2014 +:Copyright: GNU Public License v3 + +.. versionadded:: 0.9.0 + +Description +----------- + +Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen +bonds in the selections passed to it. The population of hydrogen bonds at a +given startpoint, :math:`t_0`, is evaluated based on geometric criteria and +then the lifetime of these bonds is monitored over time. Multiple passes +through the trajectory are used to build an average of the behaviour. + +.. math:: + C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle + +The subscript :math:`x` refers to the definition of lifetime being used, either +continuous or intermittent. The continuous definition measures the time that +a particular hydrogen bond remains continuously attached, whilst the +intermittent definition allows a bond to break and then subsequently reform and +be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found +via integration of this function + +.. math:: + \\tau_x = \\int_0^\\infty C_x(t) dt` + +For this, the observed behaviour is fitted to a multi exponential function, +using 2 exponents for the continuous lifetime and 3 for the intermittent +lifetime. + + :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) + + A_2 \\exp( - t / \\tau_2) + [+ A_3 \\exp( - t / \\tau_3)]` + +Where the final pre expoential factor :math:`A_n` is subject to the condition: + + :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` + +For further details see [Gowers2015]_. + +.. rubric:: References + +.. [Gowers2015] Richard J. Gowers and Paola Carbone, + A multiscale approach to model hydrogen bonding: The case of polyamide + The Journal of Chemical Physics, 142, 224907 (2015), + DOI:http://dx.doi.org/10.1063/1.4922445 + +Input +----- + +Three AtomGroup selections representing the **hydrogens**, **donors** and +**acceptors** that you wish to analyse. Note that the **hydrogens** and +**donors** selections must be aligned, that is **hydrogens[0]** and +**donors[0]** must represent a bonded pair. For systems such as water, +this will mean that each oxygen appears twice in the **donors** AtomGroup. +The function :func:`find_hydrogen_donors` can be used to construct the donor +AtomGroup +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import waterPSF, waterDCD + u = mda.Universe(waterPSF, waterDCD) + hydrogens = u.select_atoms('name H*') + donors = hbonds.find_hydrogen_donors(hydrogens) + + +Note that this requires the Universe to have bond information. If this isn't +present in the topology file, the +:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used +as so +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import GRO + # we could load the Universe with guess_bonds=True + # but this would guess **all** bonds + u = mda.Universe(GRO) + water = u.select_atoms('resname SOL and not type DUMMY') + # guess bonds only within our water atoms + # this adds the bond information directly to the Universe + water.guess_bonds() + hydrogens = water.select_atoms('type H') + # this is now possible as we guessed the bonds + donors = hbonds.find_hydrogen_donors(hydrogens) + + +The keyword **exclusions** allows a tuple of array addresses to be provided, +(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be +counted as part of the analysis. This could be used to exclude the +consideration of hydrogen bonds within the same functional group, or to perform +analysis on strictly intermolecular hydrogen bonding. + +Hydrogen bonds are defined on the basis of geometric criteria; a +Hydrogen-Acceptor distance of less then **dist_crit** and a +Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. + +The length of trajectory to analyse in ps, **sample_time**, is used to choose +what length to analyse. + +Multiple passes, controlled by the keyword **nruns**, through the trajectory +are performed and an average calculated. For each pass, **nsamples** number +of points along the run are calculated. + + +Output +------ + +All results of the analysis are available through the *solution* attribute. +This is a dictionary with the following keys + +- *results* The raw results of the time autocorrelation function. +- *time* Time axis, in ps, for the results. +- *fit* Results of the exponential curve fitting procedure. For the + *continuous* lifetime these are (A1, tau1, tau2), for the + *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). +- *tau* Calculated time constant from the fit. +- *estimate* Estimated values generated by the calculated fit. + +The *results* and *time* values are only filled after the :meth:`run` method, +*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been +used. + + +Worked Example for Polyamide +---------------------------- + +This example finds the continuous hydrogen bond lifetime between N-H..O in a +polyamide system. This will use the default geometric definition for hydrogen +bonds of length 3.0 Å and angle of 130 degrees. +It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 +sample point within this time window (this relies upon the trajectory being +sampled frequently enough). This process is repeated for 20 different start +points to build a better average. + +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import TRZ_psf, TRZ + import matplotlib.pyplot as plt + # load system + u = mda.Universe(TRZ_psf, TRZ) + # select atoms of interest into AtomGroups + H = u.select_atoms('name Hn') + N = u.select_atoms('name N') + O = u.select_atoms('name O') + # create analysis object + hb_ac = hbonds.HydrogenBondAutoCorrel(u, + acceptors=O, hydrogens=H, donors=N, + bond_type='continuous', + sample_time=2.0, nsamples=1000, nruns=20) + # call run to gather results + hb_ac.run() + # attempt to fit results to exponential equation + hb_ac.solve() + # grab results from inside object + tau = hb_ac.solution['tau'] + time = hb_ac.solution['time'] + results = hb_ac.solution['results'] + estimate = hb_ac.solution['estimate'] + # plot to check! + plt.plot(time, results, 'ro') + plt.plot(time, estimate) + plt.show() + + +Classes +------- + +.. autofunction:: find_hydrogen_donors + +.. autoclass:: HydrogenBondAutoCorrel + :members: + + +""" +import numpy as np +import scipy.optimize import warnings +from MDAnalysis.lib.log import ProgressBar +from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds +from MDAnalysis.core.groups import requires + +from MDAnalysis.due import due, Doi +due.cite(Doi("10.1063/1.4922445"), + description="Hydrogen bonding autocorrelation time", + path='MDAnalysis.analysis.hbonds.hbond_autocorrel', +) +del Doi + with warnings.catch_warnings(): warnings.simplefilter('always', DeprecationWarning) - wmsg = ("This module has been moved to " - "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel. " - "The module will be removed from here in MDAnalysis 2.1.0.") + wmsg = ("This module will be moved to " + "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel " + "in MDAnalysis 2.1.0.") warnings.warn(wmsg, category=DeprecationWarning) -from MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel import * +@requires('bonds') +def find_hydrogen_donors(hydrogens): + """Returns the donor atom for each hydrogen + + Parameters + ---------- + hydrogens : AtomGroup + the hydrogens that will form hydrogen bonds + + Returns + ------- + donors : AtomGroup + the donor atom for each hydrogen, found via bond information + + + .. versionadded:: 0.20.0 + .. deprecated:: 2.0.0 + This method will be moved to analysis.hydrogenbonds.hbond_autocorrel + in MDAnalysis 2.1.0 + """ + return sum(h.bonded_atoms[0] for h in hydrogens) + + +class HydrogenBondAutoCorrel(object): + """Perform a time autocorrelation of the hydrogen bonds in the system. + + Parameters + ---------- + universe : Universe + MDAnalysis Universe that all selections belong to + hydrogens : AtomGroup + AtomGroup of Hydrogens which can form hydrogen bonds + acceptors : AtomGroup + AtomGroup of all Acceptor atoms + donors : AtomGroup + The atoms which are connected to the hydrogens. This group + must be identical in length to the hydrogen group and matched, + ie hydrogens[0] is bonded to donors[0]. + For water, this will mean a donor appears twice in this + group, once for each hydrogen. + bond_type : str + Which definition of hydrogen bond lifetime to consider, either + 'continuous' or 'intermittent'. + exclusions : ndarray, optional + Indices of Hydrogen-Acceptor pairs to be excluded. + With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances + is calculated, *exclusions* is used as a mask on this array to exclude + some pairs. + angle_crit : float, optional + The angle (in degrees) which all bonds must be greater than [130.0] + dist_crit : float, optional + The maximum distance (in Angstroms) for a hydrogen bond [3.0] + sample_time : float, optional + The amount of time, in ps, that you wish to observe hydrogen + bonds for [100] + nruns : int, optional + The number of different start points within the trajectory + to use [1] + nsamples : int, optional + Within each run, the number of frames to analyse [50] + pbc : bool, optional + Whether to consider periodic boundaries in calculations [``True``] + + ..versionchanged: 1.0.0 + ``save_results()`` method was removed. You can instead use ``np.savez()`` + on :attr:`HydrogenBondAutoCorrel.solution['time']` and + :attr:`HydrogenBondAutoCorrel.solution['results']` instead. + .. deprecated:: 2.0.0 + This class will be moved to analysis.hydrogenbonds.hbond_autocorrel + in MDAnalysis 2.1.0 + """ + + def __init__(self, universe, + hydrogens=None, acceptors=None, donors=None, + bond_type=None, + exclusions=None, + angle_crit=130.0, dist_crit=3.0, # geometric criteria + sample_time=100, # expected length of the decay in ps + time_cut=None, # cutoff time for intermittent hbonds + nruns=1, # number of times to iterate through the trajectory + nsamples=50, # number of different points to sample in a run + pbc=True): + + #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + # "which has .autocorrelation function", + # category=DeprecationWarning) + + self.u = universe + # check that slicing is possible + try: + self.u.trajectory[0] + except Exception: + raise ValueError("Trajectory must support slicing") from None + + self.h = hydrogens + self.a = acceptors + self.d = donors + if not len(self.h) == len(self.d): + raise ValueError("Donors and Hydrogen groups must be identical " + "length. Try using `find_hydrogen_donors`.") + + self.exclusions = exclusions + if self.exclusions: + if not len(self.exclusions[0]) == len(self.exclusions[1]): + raise ValueError( + "'exclusion' must be two arrays of identical length") + + self.bond_type = bond_type + if self.bond_type not in ['continuous', 'intermittent']: + raise ValueError( + "bond_type must be either 'continuous' or 'intermittent'") + + self.a_crit = np.deg2rad(angle_crit) + self.d_crit = dist_crit + self.pbc = pbc + self.sample_time = sample_time + self.nruns = nruns + self.nsamples = nsamples + self._slice_traj(sample_time) + self.time_cut = time_cut + + self.solution = { + 'results': None, # Raw results + 'time': None, # Time axis of raw results + 'fit': None, # coefficients for fit + 'tau': None, # integral of exponential fit + 'estimate': None # y values of fit against time + } + + def _slice_traj(self, sample_time): + """Set up start and end points in the trajectory for the + different passes + """ + dt = self.u.trajectory.dt # frame step size in time + req_frames = int(sample_time / dt) # the number of frames required + + n_frames = len(self.u.trajectory) + if req_frames > n_frames: + warnings.warn("Number of required frames ({}) greater than the" + " number of frames in trajectory ({})" + .format(req_frames, n_frames), RuntimeWarning) + + numruns = self.nruns + if numruns > n_frames: + numruns = n_frames + warnings.warn("Number of runs ({}) greater than the number of" + " frames in trajectory ({})" + .format(self.nruns, n_frames), RuntimeWarning) + + self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) + # limit stop points using clip + self._stops = np.clip(self._starts + req_frames, 0, n_frames) + + self._skip = req_frames // self.nsamples + if self._skip == 0: # If nsamples > req_frames + warnings.warn("Desired number of sample points too high, using {0}" + .format(req_frames), RuntimeWarning) + self._skip = 1 + + def run(self, force=False): + """Run all the required passes + + Parameters + ---------- + force : bool, optional + Will overwrite previous results if they exist + """ + # if results exist, don't waste any time + if self.solution['results'] is not None and not force: + return + + main_results = np.zeros_like(np.arange(self._starts[0], + self._stops[0], + self._skip), + dtype=np.float32) + # for normalising later + counter = np.zeros_like(main_results, dtype=np.float32) + + for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, + self._stops)), total=self.nruns, + desc="Performing run"): + + # needed else trj seek thinks a np.int64 isn't an int? + results = self._single_run(int(start), int(stop)) + + nresults = len(results) + if nresults == len(main_results): + main_results += results + counter += 1.0 + else: + main_results[:nresults] += results + counter[:nresults] += 1.0 + + main_results /= counter + + self.solution['time'] = np.arange( + len(main_results), + dtype=np.float32) * self.u.trajectory.dt * self._skip + self.solution['results'] = main_results + + def _single_run(self, start, stop): + """Perform a single pass of the trajectory""" + self.u.trajectory[start] + + # Calculate partners at t=0 + box = self.u.dimensions if self.pbc else None + + # 2d array of all distances + pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) + if self.exclusions: + # set to above dist crit to exclude + exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) + pair = np.delete(pair, np.where(pair==exclude), 0) + + hidx, aidx = np.transpose(pair) + + + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + # from amongst those, who also satisfiess angle crit + idx2 = np.where(a > self.a_crit) + hidx = hidx[idx2] + aidx = aidx[idx2] + + nbonds = len(hidx) # number of hbonds at t=0 + results = np.zeros_like(np.arange(start, stop, self._skip), + dtype=np.float32) + + if self.time_cut: + # counter for time criteria + count = np.zeros(nbonds, dtype=np.float64) + + for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): + box = self.u.dimensions if self.pbc else None + + d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], + box=box) + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + + winners = (d < self.d_crit) & (a > self.a_crit) + results[i] = winners.sum() + + if self.bond_type == 'continuous': + # Remove losers for continuous definition + hidx = hidx[np.where(winners)] + aidx = aidx[np.where(winners)] + elif self.bond_type == 'intermittent': + if self.time_cut: + # Add to counter of where losers are + count[~ winners] += self._skip * self.u.trajectory.dt + count[winners] = 0 # Reset timer for winners + + # Remove if you've lost too many times + # New arrays contain everything but removals + hidx = hidx[count < self.time_cut] + aidx = aidx[count < self.time_cut] + count = count[count < self.time_cut] + else: + pass + + if len(hidx) == 0: # Once everyone has lost, the fun stops + break + + results /= nbonds + + return results + + def solve(self, p_guess=None): + """Fit results to an multi exponential decay and integrate to find + characteristic time + + Parameters + ---------- + p_guess : tuple of floats, optional + Initial guess for the leastsq fit, must match the shape of the + expected coefficients + + + Continuous defition results are fitted to a double exponential with + :func:`scipy.optimize.leastsq`, intermittent definition are fit to a + triple exponential. + + The results of this fitting procedure are saved into the *fit*, + *tau* and *estimate* keywords in the solution dict. + + - *fit* contains the coefficients, (A1, tau1, tau2) or + (A1, A2, tau1, tau2, tau3) + - *tau* contains the calculated lifetime in ps for the hydrogen + bonding + - *estimate* contains the estimate provided by the fit of the time + autocorrelation function + + In addition, the output of the :func:`~scipy.optimize.leastsq` function + is saved into the solution dict + + - *infodict* + - *mesg* + - *ier* + + """ + + if self.solution['results'] is None: + raise ValueError( + "Results have not been generated use, the run method first") + + # Prevents an odd bug with leastsq where it expects + # double precision data sometimes... + time = self.solution['time'].astype(np.float64) + results = self.solution['results'].astype(np.float64) + + def within_bounds(p): + """Returns True/False if boundary conditions are met or not. + Uses length of p to detect whether it's handling continuous / + intermittent + + Boundary conditions are: + 0 < A_x < 1 + sum(A_x) < 1 + 0 < tau_x + """ + if len(p) == 3: + A1, tau1, tau2 = p + return (A1 > 0.0) & (A1 < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) + elif len(p) == 5: + A1, A2, tau1, tau2, tau3 = p + return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ + (A2 < 1.0) & ((A1 + A2) < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) + + def err(p, x, y): + """Custom residual function, returns real residual if all + boundaries are met, else returns a large number to trick the + leastsq algorithm + """ + if within_bounds(p): + return y - self._my_solve(x, *p) + else: + return np.full_like(y, 100000) + + def double(x, A1, tau1, tau2): + """ Sum of two exponential functions """ + A2 = 1 - A1 + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + + def triple(x, A1, A2, tau1, tau2, tau3): + """ Sum of three exponential functions """ + A3 = 1 - (A1 + A2) + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) + + if self.bond_type == 'continuous': + self._my_solve = double + + if p_guess is None: + p_guess = (0.5, 10 * self.sample_time, self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, tau1, tau2 = p + A2 = 1 - A1 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + else: + self._my_solve = triple + + if p_guess is None: + p_guess = (0.33, 0.33, 10 * self.sample_time, + self.sample_time, 0.1 * self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, A2, tau1, tau2, tau3 = p + A3 = 1 - A1 - A2 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 + + self.solution['infodict'] = infodict + self.solution['mesg'] = mesg + self.solution['ier'] = ier + + if ier in [1, 2, 3, 4]: # solution found if ier is one of these values + self.solution['estimate'] = self._my_solve( + self.solution['time'], *p) + else: + warnings.warn("Solution to results not found", RuntimeWarning) + + def __repr__(self): + return ("" + "".format(btype=self.bond_type, n=len(self.h))) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index 9476d064138..5660d823546 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -29,4 +29,3 @@ from .hbond_analysis import HydrogenBondAnalysis from .wbridge_analysis import WaterBridgeAnalysis -from .hbond_autocorrel import HydrogenBondAutoCorrel, find_hydrogen_donors diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py deleted file mode 100644 index ae993d47c09..00000000000 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py +++ /dev/null @@ -1,608 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -""" -Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` -==================================================================================== - -:Author: Richard J. Gowers -:Year: 2014 -:Copyright: GNU Public License v3 - -.. versionadded:: 0.9.0 - -Description ------------ - -Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen -bonds in the selections passed to it. The population of hydrogen bonds at a -given startpoint, :math:`t_0`, is evaluated based on geometric criteria and -then the lifetime of these bonds is monitored over time. Multiple passes -through the trajectory are used to build an average of the behaviour. - -.. math:: - C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle - -The subscript :math:`x` refers to the definition of lifetime being used, either -continuous or intermittent. The continuous definition measures the time that -a particular hydrogen bond remains continuously attached, whilst the -intermittent definition allows a bond to break and then subsequently reform and -be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found -via integration of this function - -.. math:: - \\tau_x = \\int_0^\\infty C_x(t) dt` - -For this, the observed behaviour is fitted to a multi exponential function, -using 2 exponents for the continuous lifetime and 3 for the intermittent -lifetime. - - :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) - + A_2 \\exp( - t / \\tau_2) - [+ A_3 \\exp( - t / \\tau_3)]` - -Where the final pre expoential factor :math:`A_n` is subject to the condition: - - :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` - -For further details see [Gowers2015]_. - -.. rubric:: References - -.. [Gowers2015] Richard J. Gowers and Paola Carbone, - A multiscale approach to model hydrogen bonding: The case of polyamide - The Journal of Chemical Physics, 142, 224907 (2015), - DOI:http://dx.doi.org/10.1063/1.4922445 - -Input ------ - -Three AtomGroup selections representing the **hydrogens**, **donors** and -**acceptors** that you wish to analyse. Note that the **hydrogens** and -**donors** selections must be aligned, that is **hydrogens[0]** and -**donors[0]** must represent a bonded pair. For systems such as water, -this will mean that each oxygen appears twice in the **donors** AtomGroup. -The function :func:`find_hydrogen_donors` can be used to construct the donor -AtomGroup -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import waterPSF, waterDCD - u = mda.Universe(waterPSF, waterDCD) - hydrogens = u.select_atoms('name H*') - donors = hbonds.find_hydrogen_donors(hydrogens) - - -Note that this requires the Universe to have bond information. If this isn't -present in the topology file, the -:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used -as so -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import GRO - # we could load the Universe with guess_bonds=True - # but this would guess **all** bonds - u = mda.Universe(GRO) - water = u.select_atoms('resname SOL and not type DUMMY') - # guess bonds only within our water atoms - # this adds the bond information directly to the Universe - water.guess_bonds() - hydrogens = water.select_atoms('type H') - # this is now possible as we guessed the bonds - donors = hbonds.find_hydrogen_donors(hydrogens) - - -The keyword **exclusions** allows a tuple of array addresses to be provided, -(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be -counted as part of the analysis. This could be used to exclude the -consideration of hydrogen bonds within the same functional group, or to perform -analysis on strictly intermolecular hydrogen bonding. - -Hydrogen bonds are defined on the basis of geometric criteria; a -Hydrogen-Acceptor distance of less then **dist_crit** and a -Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. - -The length of trajectory to analyse in ps, **sample_time**, is used to choose -what length to analyse. - -Multiple passes, controlled by the keyword **nruns**, through the trajectory -are performed and an average calculated. For each pass, **nsamples** number -of points along the run are calculated. - - -Output ------- - -All results of the analysis are available through the *solution* attribute. -This is a dictionary with the following keys - -- *results* The raw results of the time autocorrelation function. -- *time* Time axis, in ps, for the results. -- *fit* Results of the exponential curve fitting procedure. For the - *continuous* lifetime these are (A1, tau1, tau2), for the - *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). -- *tau* Calculated time constant from the fit. -- *estimate* Estimated values generated by the calculated fit. - -The *results* and *time* values are only filled after the :meth:`run` method, -*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been -used. - - -Worked Example for Polyamide ----------------------------- - -This example finds the continuous hydrogen bond lifetime between N-H..O in a -polyamide system. This will use the default geometric definition for hydrogen -bonds of length 3.0 Å and angle of 130 degrees. -It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 -sample point within this time window (this relies upon the trajectory being -sampled frequently enough). This process is repeated for 20 different start -points to build a better average. - -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import TRZ_psf, TRZ - import matplotlib.pyplot as plt - # load system - u = mda.Universe(TRZ_psf, TRZ) - # select atoms of interest into AtomGroups - H = u.select_atoms('name Hn') - N = u.select_atoms('name N') - O = u.select_atoms('name O') - # create analysis object - hb_ac = hbonds.HydrogenBondAutoCorrel(u, - acceptors=O, hydrogens=H, donors=N, - bond_type='continuous', - sample_time=2.0, nsamples=1000, nruns=20) - # call run to gather results - hb_ac.run() - # attempt to fit results to exponential equation - hb_ac.solve() - # grab results from inside object - tau = hb_ac.solution['tau'] - time = hb_ac.solution['time'] - results = hb_ac.solution['results'] - estimate = hb_ac.solution['estimate'] - # plot to check! - plt.plot(time, results, 'ro') - plt.plot(time, estimate) - plt.show() - - -Classes -------- - -.. autofunction:: find_hydrogen_donors - -.. autoclass:: HydrogenBondAutoCorrel - :members: - - -""" -import numpy as np -import scipy.optimize - -import warnings - -from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds -from MDAnalysis.core.groups import requires - -from MDAnalysis.due import due, Doi -due.cite(Doi("10.1063/1.4922445"), - description="Hydrogen bonding autocorrelation time", - path='MDAnalysis.analysis.hbonds.hbond_autocorrel', -) -del Doi - - -@requires('bonds') -def find_hydrogen_donors(hydrogens): - """Returns the donor atom for each hydrogen - - Parameters - ---------- - hydrogens : AtomGroup - the hydrogens that will form hydrogen bonds - - Returns - ------- - donors : AtomGroup - the donor atom for each hydrogen, found via bond information - - - .. versionadded:: 0.20.0 - """ - return sum(h.bonded_atoms[0] for h in hydrogens) - - -class HydrogenBondAutoCorrel(object): - """Perform a time autocorrelation of the hydrogen bonds in the system. - - Parameters - ---------- - universe : Universe - MDAnalysis Universe that all selections belong to - hydrogens : AtomGroup - AtomGroup of Hydrogens which can form hydrogen bonds - acceptors : AtomGroup - AtomGroup of all Acceptor atoms - donors : AtomGroup - The atoms which are connected to the hydrogens. This group - must be identical in length to the hydrogen group and matched, - ie hydrogens[0] is bonded to donors[0]. - For water, this will mean a donor appears twice in this - group, once for each hydrogen. - bond_type : str - Which definition of hydrogen bond lifetime to consider, either - 'continuous' or 'intermittent'. - exclusions : ndarray, optional - Indices of Hydrogen-Acceptor pairs to be excluded. - With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances - is calculated, *exclusions* is used as a mask on this array to exclude - some pairs. - angle_crit : float, optional - The angle (in degrees) which all bonds must be greater than [130.0] - dist_crit : float, optional - The maximum distance (in Angstroms) for a hydrogen bond [3.0] - sample_time : float, optional - The amount of time, in ps, that you wish to observe hydrogen - bonds for [100] - nruns : int, optional - The number of different start points within the trajectory - to use [1] - nsamples : int, optional - Within each run, the number of frames to analyse [50] - pbc : bool, optional - Whether to consider periodic boundaries in calculations [``True``] - - ..versionchanged: 1.0.0 - ``save_results()`` method was removed. You can instead use ``np.savez()`` - on :attr:`HydrogenBondAutoCorrel.solution['time']` and - :attr:`HydrogenBondAutoCorrel.solution['results']` instead. - """ - - def __init__(self, universe, - hydrogens=None, acceptors=None, donors=None, - bond_type=None, - exclusions=None, - angle_crit=130.0, dist_crit=3.0, # geometric criteria - sample_time=100, # expected length of the decay in ps - time_cut=None, # cutoff time for intermittent hbonds - nruns=1, # number of times to iterate through the trajectory - nsamples=50, # number of different points to sample in a run - pbc=True): - - #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " - # "which has .autocorrelation function", - # category=DeprecationWarning) - - self.u = universe - # check that slicing is possible - try: - self.u.trajectory[0] - except Exception: - raise ValueError("Trajectory must support slicing") from None - - self.h = hydrogens - self.a = acceptors - self.d = donors - if not len(self.h) == len(self.d): - raise ValueError("Donors and Hydrogen groups must be identical " - "length. Try using `find_hydrogen_donors`.") - - self.exclusions = exclusions - if self.exclusions: - if not len(self.exclusions[0]) == len(self.exclusions[1]): - raise ValueError( - "'exclusion' must be two arrays of identical length") - - self.bond_type = bond_type - if self.bond_type not in ['continuous', 'intermittent']: - raise ValueError( - "bond_type must be either 'continuous' or 'intermittent'") - - self.a_crit = np.deg2rad(angle_crit) - self.d_crit = dist_crit - self.pbc = pbc - self.sample_time = sample_time - self.nruns = nruns - self.nsamples = nsamples - self._slice_traj(sample_time) - self.time_cut = time_cut - - self.solution = { - 'results': None, # Raw results - 'time': None, # Time axis of raw results - 'fit': None, # coefficients for fit - 'tau': None, # integral of exponential fit - 'estimate': None # y values of fit against time - } - - def _slice_traj(self, sample_time): - """Set up start and end points in the trajectory for the - different passes - """ - dt = self.u.trajectory.dt # frame step size in time - req_frames = int(sample_time / dt) # the number of frames required - - n_frames = len(self.u.trajectory) - if req_frames > n_frames: - warnings.warn("Number of required frames ({}) greater than the" - " number of frames in trajectory ({})" - .format(req_frames, n_frames), RuntimeWarning) - - numruns = self.nruns - if numruns > n_frames: - numruns = n_frames - warnings.warn("Number of runs ({}) greater than the number of" - " frames in trajectory ({})" - .format(self.nruns, n_frames), RuntimeWarning) - - self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) - # limit stop points using clip - self._stops = np.clip(self._starts + req_frames, 0, n_frames) - - self._skip = req_frames // self.nsamples - if self._skip == 0: # If nsamples > req_frames - warnings.warn("Desired number of sample points too high, using {0}" - .format(req_frames), RuntimeWarning) - self._skip = 1 - - def run(self, force=False): - """Run all the required passes - - Parameters - ---------- - force : bool, optional - Will overwrite previous results if they exist - """ - # if results exist, don't waste any time - if self.solution['results'] is not None and not force: - return - - main_results = np.zeros_like(np.arange(self._starts[0], - self._stops[0], - self._skip), - dtype=np.float32) - # for normalising later - counter = np.zeros_like(main_results, dtype=np.float32) - - for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, - self._stops)), total=self.nruns, - desc="Performing run"): - - # needed else trj seek thinks a np.int64 isn't an int? - results = self._single_run(int(start), int(stop)) - - nresults = len(results) - if nresults == len(main_results): - main_results += results - counter += 1.0 - else: - main_results[:nresults] += results - counter[:nresults] += 1.0 - - main_results /= counter - - self.solution['time'] = np.arange( - len(main_results), - dtype=np.float32) * self.u.trajectory.dt * self._skip - self.solution['results'] = main_results - - def _single_run(self, start, stop): - """Perform a single pass of the trajectory""" - self.u.trajectory[start] - - # Calculate partners at t=0 - box = self.u.dimensions if self.pbc else None - - # 2d array of all distances - pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) - if self.exclusions: - # set to above dist crit to exclude - exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) - pair = np.delete(pair, np.where(pair==exclude), 0) - - hidx, aidx = np.transpose(pair) - - - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - # from amongst those, who also satisfiess angle crit - idx2 = np.where(a > self.a_crit) - hidx = hidx[idx2] - aidx = aidx[idx2] - - nbonds = len(hidx) # number of hbonds at t=0 - results = np.zeros_like(np.arange(start, stop, self._skip), - dtype=np.float32) - - if self.time_cut: - # counter for time criteria - count = np.zeros(nbonds, dtype=np.float64) - - for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): - box = self.u.dimensions if self.pbc else None - - d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], - box=box) - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - - winners = (d < self.d_crit) & (a > self.a_crit) - results[i] = winners.sum() - - if self.bond_type == 'continuous': - # Remove losers for continuous definition - hidx = hidx[np.where(winners)] - aidx = aidx[np.where(winners)] - elif self.bond_type == 'intermittent': - if self.time_cut: - # Add to counter of where losers are - count[~ winners] += self._skip * self.u.trajectory.dt - count[winners] = 0 # Reset timer for winners - - # Remove if you've lost too many times - # New arrays contain everything but removals - hidx = hidx[count < self.time_cut] - aidx = aidx[count < self.time_cut] - count = count[count < self.time_cut] - else: - pass - - if len(hidx) == 0: # Once everyone has lost, the fun stops - break - - results /= nbonds - - return results - - def solve(self, p_guess=None): - """Fit results to an multi exponential decay and integrate to find - characteristic time - - Parameters - ---------- - p_guess : tuple of floats, optional - Initial guess for the leastsq fit, must match the shape of the - expected coefficients - - - Continuous defition results are fitted to a double exponential with - :func:`scipy.optimize.leastsq`, intermittent definition are fit to a - triple exponential. - - The results of this fitting procedure are saved into the *fit*, - *tau* and *estimate* keywords in the solution dict. - - - *fit* contains the coefficients, (A1, tau1, tau2) or - (A1, A2, tau1, tau2, tau3) - - *tau* contains the calculated lifetime in ps for the hydrogen - bonding - - *estimate* contains the estimate provided by the fit of the time - autocorrelation function - - In addition, the output of the :func:`~scipy.optimize.leastsq` function - is saved into the solution dict - - - *infodict* - - *mesg* - - *ier* - - """ - - if self.solution['results'] is None: - raise ValueError( - "Results have not been generated use, the run method first") - - # Prevents an odd bug with leastsq where it expects - # double precision data sometimes... - time = self.solution['time'].astype(np.float64) - results = self.solution['results'].astype(np.float64) - - def within_bounds(p): - """Returns True/False if boundary conditions are met or not. - Uses length of p to detect whether it's handling continuous / - intermittent - - Boundary conditions are: - 0 < A_x < 1 - sum(A_x) < 1 - 0 < tau_x - """ - if len(p) == 3: - A1, tau1, tau2 = p - return (A1 > 0.0) & (A1 < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) - elif len(p) == 5: - A1, A2, tau1, tau2, tau3 = p - return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ - (A2 < 1.0) & ((A1 + A2) < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) - - def err(p, x, y): - """Custom residual function, returns real residual if all - boundaries are met, else returns a large number to trick the - leastsq algorithm - """ - if within_bounds(p): - return y - self._my_solve(x, *p) - else: - return np.full_like(y, 100000) - - def double(x, A1, tau1, tau2): - """ Sum of two exponential functions """ - A2 = 1 - A1 - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) - - def triple(x, A1, A2, tau1, tau2, tau3): - """ Sum of three exponential functions """ - A3 = 1 - (A1 + A2) - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) - - if self.bond_type == 'continuous': - self._my_solve = double - - if p_guess is None: - p_guess = (0.5, 10 * self.sample_time, self.sample_time) - - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, tau1, tau2 = p - A2 = 1 - A1 - self.solution['tau'] = A1 * tau1 + A2 * tau2 - else: - self._my_solve = triple - - if p_guess is None: - p_guess = (0.33, 0.33, 10 * self.sample_time, - self.sample_time, 0.1 * self.sample_time) - - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, A2, tau1, tau2, tau3 = p - A3 = 1 - A1 - A2 - self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 - - self.solution['infodict'] = infodict - self.solution['mesg'] = mesg - self.solution['ier'] = ier - - if ier in [1, 2, 3, 4]: # solution found if ier is one of these values - self.solution['estimate'] = self._my_solve( - self.solution['time'], *p) - else: - warnings.warn("Solution to results not found", RuntimeWarning) - - def __repr__(self): - return ("" - "".format(btype=self.bond_type, n=len(self.h))) diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py index 4968ba363bb..d46ab54ef35 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py @@ -31,10 +31,11 @@ import numpy as np from unittest import mock import os +from importlib import reload import MDAnalysis as mda -from MDAnalysis.analysis import hydrogenbonds -from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAutoCorrel as HBAC +from MDAnalysis.analysis import hbonds +from MDAnalysis.analysis.hbonds import HydrogenBondAutoCorrel as HBAC class TestHydrogenBondAutocorrel(object): @@ -281,7 +282,7 @@ def test_find_donors(): H = u.select_atoms('name H*') - D = hydrogenbonds.find_hydrogen_donors(H) + D = hbonds.find_hydrogen_donors(H) assert len(H) == len(D) # check each O is bonded to the corresponding H @@ -293,11 +294,11 @@ def test_donors_nobonds(): u = mda.Universe(XYZ_mini) with pytest.raises(mda.NoDataError): - hydrogenbonds.find_hydrogen_donors(u.atoms) + hbonds.find_hydrogen_donors(u.atoms) def test_moved_module_warning(): - wmsg = ("This module has been moved to " + wmsg = ("This module will be moved to " "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel") with pytest.warns(DeprecationWarning, match=wmsg): - import MDAnalysis.analysis.hbonds.hbond_autocorrel + reload(hbonds.hbond_autocorrel) From 2ad6b56cfbf4f13c59919048624c675a7b5239da Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 2 May 2021 21:37:03 +0100 Subject: [PATCH 3/7] Fix docs --- .../source/documentation_pages/analysis/hbond_analysis.rst | 1 - .../doc/sphinx/source/documentation_pages/analysis_modules.rst | 1 - 2 files changed, 2 deletions(-) delete mode 100644 package/doc/sphinx/source/documentation_pages/analysis/hbond_analysis.rst diff --git a/package/doc/sphinx/source/documentation_pages/analysis/hbond_analysis.rst b/package/doc/sphinx/source/documentation_pages/analysis/hbond_analysis.rst deleted file mode 100644 index c44a4a8e424..00000000000 --- a/package/doc/sphinx/source/documentation_pages/analysis/hbond_analysis.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: MDAnalysis.analysis.hbonds.hbond_analysis diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 95605bace5b..8209d096d8a 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -68,7 +68,6 @@ Hydrogen bonding :maxdepth: 1 analysis/hydrogenbonds - analysis/hbond_analysis analysis/hbond_autocorrel analysis/wbridge_analysis From 12bbf4e979bc056ffd28e1c72895f2577bd32137 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 5 May 2021 14:58:43 -0700 Subject: [PATCH 4/7] add output to test_api/submodule test --- testsuite/MDAnalysisTests/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/test_api.py b/testsuite/MDAnalysisTests/test_api.py index c60a175b3a7..08f1b052196 100644 --- a/testsuite/MDAnalysisTests/test_api.py +++ b/testsuite/MDAnalysisTests/test_api.py @@ -79,4 +79,5 @@ def test_all_import(submodule): and name not in [os.path.splitext(f)[0] for f in os.listdir(module_path)]] assert_equal(missing, [], err_msg="{}".format(submodule) + - "has errors in __all__ list.") + " has errors in __all__ list: " + + "missing = {}".format(missing)) From eb3b6378596c5bd101eaf05aeccc6c7cccae773b Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 5 May 2021 16:40:54 -0700 Subject: [PATCH 5/7] moved hbonds.hbond_autocorrel to hydrogenbonds.hbond_autocorrel - deprecated hbonds.hbond_autocorrel (2.0.0) and stub will be removed in 3.0.0 - hbonds.hbond_autocorrel module raises a deprecation warning, HydrogenBondAutoCorrel and find_hydrogen_donors raise deprecation warnings - tests (note: tests were copied and we run full tests on the deprecated and the moved code, mainly to make sure that they can be called as expected; deprecation warnings are also tested) --- .../analysis/hbonds/hbond_autocorrel.py | 593 +---------------- .../analysis/hydrogenbonds/__init__.py | 1 + .../hydrogenbonds/hbond_autocorrel.py | 613 ++++++++++++++++++ .../analysis/test_hydrogenbondautocorrel.py | 16 +- .../test_hydrogenbondautocorrel_deprecated.py | 331 ++++++++++ 5 files changed, 971 insertions(+), 583 deletions(-) create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py create mode 100644 testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel_deprecated.py diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 55c75ec72bd..f8329f4a2fa 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -20,8 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -""" -Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` +"""Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` ==================================================================================== :Author: Richard J. Gowers @@ -30,591 +29,43 @@ .. versionadded:: 0.9.0 -Description ------------ - -Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen -bonds in the selections passed to it. The population of hydrogen bonds at a -given startpoint, :math:`t_0`, is evaluated based on geometric criteria and -then the lifetime of these bonds is monitored over time. Multiple passes -through the trajectory are used to build an average of the behaviour. - -.. math:: - C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle - -The subscript :math:`x` refers to the definition of lifetime being used, either -continuous or intermittent. The continuous definition measures the time that -a particular hydrogen bond remains continuously attached, whilst the -intermittent definition allows a bond to break and then subsequently reform and -be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found -via integration of this function - -.. math:: - \\tau_x = \\int_0^\\infty C_x(t) dt` - -For this, the observed behaviour is fitted to a multi exponential function, -using 2 exponents for the continuous lifetime and 3 for the intermittent -lifetime. - - :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) - + A_2 \\exp( - t / \\tau_2) - [+ A_3 \\exp( - t / \\tau_3)]` - -Where the final pre expoential factor :math:`A_n` is subject to the condition: - - :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` - -For further details see [Gowers2015]_. - -.. rubric:: References - -.. [Gowers2015] Richard J. Gowers and Paola Carbone, - A multiscale approach to model hydrogen bonding: The case of polyamide - The Journal of Chemical Physics, 142, 224907 (2015), - DOI:http://dx.doi.org/10.1063/1.4922445 - -Input ------ - -Three AtomGroup selections representing the **hydrogens**, **donors** and -**acceptors** that you wish to analyse. Note that the **hydrogens** and -**donors** selections must be aligned, that is **hydrogens[0]** and -**donors[0]** must represent a bonded pair. For systems such as water, -this will mean that each oxygen appears twice in the **donors** AtomGroup. -The function :func:`find_hydrogen_donors` can be used to construct the donor -AtomGroup -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import waterPSF, waterDCD - u = mda.Universe(waterPSF, waterDCD) - hydrogens = u.select_atoms('name H*') - donors = hbonds.find_hydrogen_donors(hydrogens) - - -Note that this requires the Universe to have bond information. If this isn't -present in the topology file, the -:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used -as so -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import GRO - # we could load the Universe with guess_bonds=True - # but this would guess **all** bonds - u = mda.Universe(GRO) - water = u.select_atoms('resname SOL and not type DUMMY') - # guess bonds only within our water atoms - # this adds the bond information directly to the Universe - water.guess_bonds() - hydrogens = water.select_atoms('type H') - # this is now possible as we guessed the bonds - donors = hbonds.find_hydrogen_donors(hydrogens) - - -The keyword **exclusions** allows a tuple of array addresses to be provided, -(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be -counted as part of the analysis. This could be used to exclude the -consideration of hydrogen bonds within the same functional group, or to perform -analysis on strictly intermolecular hydrogen bonding. - -Hydrogen bonds are defined on the basis of geometric criteria; a -Hydrogen-Acceptor distance of less then **dist_crit** and a -Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. - -The length of trajectory to analyse in ps, **sample_time**, is used to choose -what length to analyse. - -Multiple passes, controlled by the keyword **nruns**, through the trajectory -are performed and an average calculated. For each pass, **nsamples** number -of points along the run are calculated. - - -Output ------- +.. deprecated:: 2.0.0 -All results of the analysis are available through the *solution* attribute. -This is a dictionary with the following keys + This module was moved to + :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel` and access to this + module through :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` will be + removed in 3.0.0. -- *results* The raw results of the time autocorrelation function. -- *time* Time axis, in ps, for the results. -- *fit* Results of the exponential curve fitting procedure. For the - *continuous* lifetime these are (A1, tau1, tau2), for the - *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). -- *tau* Calculated time constant from the fit. -- *estimate* Estimated values generated by the calculated fit. - -The *results* and *time* values are only filled after the :meth:`run` method, -*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been -used. - - -Worked Example for Polyamide ----------------------------- - -This example finds the continuous hydrogen bond lifetime between N-H..O in a -polyamide system. This will use the default geometric definition for hydrogen -bonds of length 3.0 Å and angle of 130 degrees. -It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 -sample point within this time window (this relies upon the trajectory being -sampled frequently enough). This process is repeated for 20 different start -points to build a better average. - -:: - - import MDAnalysis as mda - from MDAnalysis.analysis import hbonds - from MDAnalysis.tests.datafiles import TRZ_psf, TRZ - import matplotlib.pyplot as plt - # load system - u = mda.Universe(TRZ_psf, TRZ) - # select atoms of interest into AtomGroups - H = u.select_atoms('name Hn') - N = u.select_atoms('name N') - O = u.select_atoms('name O') - # create analysis object - hb_ac = hbonds.HydrogenBondAutoCorrel(u, - acceptors=O, hydrogens=H, donors=N, - bond_type='continuous', - sample_time=2.0, nsamples=1000, nruns=20) - # call run to gather results - hb_ac.run() - # attempt to fit results to exponential equation - hb_ac.solve() - # grab results from inside object - tau = hb_ac.solution['tau'] - time = hb_ac.solution['time'] - results = hb_ac.solution['results'] - estimate = hb_ac.solution['estimate'] - # plot to check! - plt.plot(time, results, 'ro') - plt.plot(time, estimate) - plt.show() - - -Classes -------- - -.. autofunction:: find_hydrogen_donors - -.. autoclass:: HydrogenBondAutoCorrel - :members: +See Also +-------- +:mod:`MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel` """ -import numpy as np -import scipy.optimize - import warnings -from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds -from MDAnalysis.core.groups import requires - -from MDAnalysis.due import due, Doi -due.cite(Doi("10.1063/1.4922445"), - description="Hydrogen bonding autocorrelation time", - path='MDAnalysis.analysis.hbonds.hbond_autocorrel', -) -del Doi - with warnings.catch_warnings(): warnings.simplefilter('always', DeprecationWarning) - wmsg = ("This module will be moved to " - "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel " - "in MDAnalysis 2.1.0.") + wmsg = ("This module was moved to " + "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel; " + "hbonds.hbond_autocorrel will be removed in 3.0.0.") warnings.warn(wmsg, category=DeprecationWarning) -@requires('bonds') -def find_hydrogen_donors(hydrogens): - """Returns the donor atom for each hydrogen - - Parameters - ---------- - hydrogens : AtomGroup - the hydrogens that will form hydrogen bonds - - Returns - ------- - donors : AtomGroup - the donor atom for each hydrogen, found via bond information - - - .. versionadded:: 0.20.0 - .. deprecated:: 2.0.0 - This method will be moved to analysis.hydrogenbonds.hbond_autocorrel - in MDAnalysis 2.1.0 - """ - return sum(h.bonded_atoms[0] for h in hydrogens) - - -class HydrogenBondAutoCorrel(object): - """Perform a time autocorrelation of the hydrogen bonds in the system. - - Parameters - ---------- - universe : Universe - MDAnalysis Universe that all selections belong to - hydrogens : AtomGroup - AtomGroup of Hydrogens which can form hydrogen bonds - acceptors : AtomGroup - AtomGroup of all Acceptor atoms - donors : AtomGroup - The atoms which are connected to the hydrogens. This group - must be identical in length to the hydrogen group and matched, - ie hydrogens[0] is bonded to donors[0]. - For water, this will mean a donor appears twice in this - group, once for each hydrogen. - bond_type : str - Which definition of hydrogen bond lifetime to consider, either - 'continuous' or 'intermittent'. - exclusions : ndarray, optional - Indices of Hydrogen-Acceptor pairs to be excluded. - With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances - is calculated, *exclusions* is used as a mask on this array to exclude - some pairs. - angle_crit : float, optional - The angle (in degrees) which all bonds must be greater than [130.0] - dist_crit : float, optional - The maximum distance (in Angstroms) for a hydrogen bond [3.0] - sample_time : float, optional - The amount of time, in ps, that you wish to observe hydrogen - bonds for [100] - nruns : int, optional - The number of different start points within the trajectory - to use [1] - nsamples : int, optional - Within each run, the number of frames to analyse [50] - pbc : bool, optional - Whether to consider periodic boundaries in calculations [``True``] - - ..versionchanged: 1.0.0 - ``save_results()`` method was removed. You can instead use ``np.savez()`` - on :attr:`HydrogenBondAutoCorrel.solution['time']` and - :attr:`HydrogenBondAutoCorrel.solution['results']` instead. - .. deprecated:: 2.0.0 - This class will be moved to analysis.hydrogenbonds.hbond_autocorrel - in MDAnalysis 2.1.0 - """ - - def __init__(self, universe, - hydrogens=None, acceptors=None, donors=None, - bond_type=None, - exclusions=None, - angle_crit=130.0, dist_crit=3.0, # geometric criteria - sample_time=100, # expected length of the decay in ps - time_cut=None, # cutoff time for intermittent hbonds - nruns=1, # number of times to iterate through the trajectory - nsamples=50, # number of different points to sample in a run - pbc=True): - - #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " - # "which has .autocorrelation function", - # category=DeprecationWarning) - - self.u = universe - # check that slicing is possible - try: - self.u.trajectory[0] - except Exception: - raise ValueError("Trajectory must support slicing") from None - - self.h = hydrogens - self.a = acceptors - self.d = donors - if not len(self.h) == len(self.d): - raise ValueError("Donors and Hydrogen groups must be identical " - "length. Try using `find_hydrogen_donors`.") - - self.exclusions = exclusions - if self.exclusions: - if not len(self.exclusions[0]) == len(self.exclusions[1]): - raise ValueError( - "'exclusion' must be two arrays of identical length") - - self.bond_type = bond_type - if self.bond_type not in ['continuous', 'intermittent']: - raise ValueError( - "bond_type must be either 'continuous' or 'intermittent'") - - self.a_crit = np.deg2rad(angle_crit) - self.d_crit = dist_crit - self.pbc = pbc - self.sample_time = sample_time - self.nruns = nruns - self.nsamples = nsamples - self._slice_traj(sample_time) - self.time_cut = time_cut - - self.solution = { - 'results': None, # Raw results - 'time': None, # Time axis of raw results - 'fit': None, # coefficients for fit - 'tau': None, # integral of exponential fit - 'estimate': None # y values of fit against time - } - - def _slice_traj(self, sample_time): - """Set up start and end points in the trajectory for the - different passes - """ - dt = self.u.trajectory.dt # frame step size in time - req_frames = int(sample_time / dt) # the number of frames required - - n_frames = len(self.u.trajectory) - if req_frames > n_frames: - warnings.warn("Number of required frames ({}) greater than the" - " number of frames in trajectory ({})" - .format(req_frames, n_frames), RuntimeWarning) - - numruns = self.nruns - if numruns > n_frames: - numruns = n_frames - warnings.warn("Number of runs ({}) greater than the number of" - " frames in trajectory ({})" - .format(self.nruns, n_frames), RuntimeWarning) - - self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) - # limit stop points using clip - self._stops = np.clip(self._starts + req_frames, 0, n_frames) - - self._skip = req_frames // self.nsamples - if self._skip == 0: # If nsamples > req_frames - warnings.warn("Desired number of sample points too high, using {0}" - .format(req_frames), RuntimeWarning) - self._skip = 1 - - def run(self, force=False): - """Run all the required passes - - Parameters - ---------- - force : bool, optional - Will overwrite previous results if they exist - """ - # if results exist, don't waste any time - if self.solution['results'] is not None and not force: - return - - main_results = np.zeros_like(np.arange(self._starts[0], - self._stops[0], - self._skip), - dtype=np.float32) - # for normalising later - counter = np.zeros_like(main_results, dtype=np.float32) - - for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, - self._stops)), total=self.nruns, - desc="Performing run"): - - # needed else trj seek thinks a np.int64 isn't an int? - results = self._single_run(int(start), int(stop)) - - nresults = len(results) - if nresults == len(main_results): - main_results += results - counter += 1.0 - else: - main_results[:nresults] += results - counter[:nresults] += 1.0 - - main_results /= counter - - self.solution['time'] = np.arange( - len(main_results), - dtype=np.float32) * self.u.trajectory.dt * self._skip - self.solution['results'] = main_results - - def _single_run(self, start, stop): - """Perform a single pass of the trajectory""" - self.u.trajectory[start] - - # Calculate partners at t=0 - box = self.u.dimensions if self.pbc else None - - # 2d array of all distances - pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) - if self.exclusions: - # set to above dist crit to exclude - exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) - pair = np.delete(pair, np.where(pair==exclude), 0) - - hidx, aidx = np.transpose(pair) - - - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - # from amongst those, who also satisfiess angle crit - idx2 = np.where(a > self.a_crit) - hidx = hidx[idx2] - aidx = aidx[idx2] - - nbonds = len(hidx) # number of hbonds at t=0 - results = np.zeros_like(np.arange(start, stop, self._skip), - dtype=np.float32) - - if self.time_cut: - # counter for time criteria - count = np.zeros(nbonds, dtype=np.float64) - - for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): - box = self.u.dimensions if self.pbc else None - - d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], - box=box) - a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], - self.a.positions[aidx], box=box) - - winners = (d < self.d_crit) & (a > self.a_crit) - results[i] = winners.sum() - - if self.bond_type == 'continuous': - # Remove losers for continuous definition - hidx = hidx[np.where(winners)] - aidx = aidx[np.where(winners)] - elif self.bond_type == 'intermittent': - if self.time_cut: - # Add to counter of where losers are - count[~ winners] += self._skip * self.u.trajectory.dt - count[winners] = 0 # Reset timer for winners - - # Remove if you've lost too many times - # New arrays contain everything but removals - hidx = hidx[count < self.time_cut] - aidx = aidx[count < self.time_cut] - count = count[count < self.time_cut] - else: - pass - - if len(hidx) == 0: # Once everyone has lost, the fun stops - break - - results /= nbonds - - return results - - def solve(self, p_guess=None): - """Fit results to an multi exponential decay and integrate to find - characteristic time - - Parameters - ---------- - p_guess : tuple of floats, optional - Initial guess for the leastsq fit, must match the shape of the - expected coefficients - - - Continuous defition results are fitted to a double exponential with - :func:`scipy.optimize.leastsq`, intermittent definition are fit to a - triple exponential. - - The results of this fitting procedure are saved into the *fit*, - *tau* and *estimate* keywords in the solution dict. - - - *fit* contains the coefficients, (A1, tau1, tau2) or - (A1, A2, tau1, tau2, tau3) - - *tau* contains the calculated lifetime in ps for the hydrogen - bonding - - *estimate* contains the estimate provided by the fit of the time - autocorrelation function - - In addition, the output of the :func:`~scipy.optimize.leastsq` function - is saved into the solution dict - - - *infodict* - - *mesg* - - *ier* - - """ - - if self.solution['results'] is None: - raise ValueError( - "Results have not been generated use, the run method first") - - # Prevents an odd bug with leastsq where it expects - # double precision data sometimes... - time = self.solution['time'].astype(np.float64) - results = self.solution['results'].astype(np.float64) - - def within_bounds(p): - """Returns True/False if boundary conditions are met or not. - Uses length of p to detect whether it's handling continuous / - intermittent - - Boundary conditions are: - 0 < A_x < 1 - sum(A_x) < 1 - 0 < tau_x - """ - if len(p) == 3: - A1, tau1, tau2 = p - return (A1 > 0.0) & (A1 < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) - elif len(p) == 5: - A1, A2, tau1, tau2, tau3 = p - return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ - (A2 < 1.0) & ((A1 + A2) < 1.0) & \ - (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) - - def err(p, x, y): - """Custom residual function, returns real residual if all - boundaries are met, else returns a large number to trick the - leastsq algorithm - """ - if within_bounds(p): - return y - self._my_solve(x, *p) - else: - return np.full_like(y, 100000) - - def double(x, A1, tau1, tau2): - """ Sum of two exponential functions """ - A2 = 1 - A1 - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) - - def triple(x, A1, A2, tau1, tau2, tau3): - """ Sum of three exponential functions """ - A3 = 1 - (A1 + A2) - return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) +from MDAnalysis.lib.util import deprecate - if self.bond_type == 'continuous': - self._my_solve = double +from ..hydrogenbonds import hbond_autocorrel - if p_guess is None: - p_guess = (0.5, 10 * self.sample_time, self.sample_time) - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, tau1, tau2 = p - A2 = 1 - A1 - self.solution['tau'] = A1 * tau1 + A2 * tau2 - else: - self._my_solve = triple +find_hydrogen_donors = deprecate(hbond_autocorrel.find_hydrogen_donors, + release="2.0.0", remove="3.0.0", + message="The function was moved to " + "MDAnalysis.analysis.hbonds.hbond_autocorrel.") - if p_guess is None: - p_guess = (0.33, 0.33, 10 * self.sample_time, - self.sample_time, 0.1 * self.sample_time) +HydrogenBondAutoCorrel = deprecate(hbond_autocorrel.HydrogenBondAutoCorrel, + release="2.0.0", remove="3.0.0", + message="The class was moved to " + "MDAnalysis.analysis.hbonds.hbond_autocorrel.") - p, cov, infodict, mesg, ier = scipy.optimize.leastsq( - err, p_guess, args=(time, results), full_output=True) - self.solution['fit'] = p - A1, A2, tau1, tau2, tau3 = p - A3 = 1 - A1 - A2 - self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 - self.solution['infodict'] = infodict - self.solution['mesg'] = mesg - self.solution['ier'] = ier - if ier in [1, 2, 3, 4]: # solution found if ier is one of these values - self.solution['estimate'] = self._my_solve( - self.solution['time'], *p) - else: - warnings.warn("Solution to results not found", RuntimeWarning) - def __repr__(self): - return ("" - "".format(btype=self.bond_type, n=len(self.h))) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index 5660d823546..9476d064138 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -29,3 +29,4 @@ from .hbond_analysis import HydrogenBondAnalysis from .wbridge_analysis import WaterBridgeAnalysis +from .hbond_autocorrel import HydrogenBondAutoCorrel, find_hydrogen_donors diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py new file mode 100644 index 00000000000..c47df1e1e34 --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_autocorrel.py @@ -0,0 +1,613 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +"""\ +Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel` +=========================================================================================== + +:Author: Richard J. Gowers +:Year: 2014 +:Copyright: GNU Public License v3 + +.. versionadded:: 0.9.0 + +.. versionchanged:: 2.0.0 + + Module moved from :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` to + :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel`. + + +Description +----------- + +Calculates the time autocorrelation function, :math:`C_x(t)`, for the hydrogen +bonds in the selections passed to it. The population of hydrogen bonds at a +given startpoint, :math:`t_0`, is evaluated based on geometric criteria and +then the lifetime of these bonds is monitored over time. Multiple passes +through the trajectory are used to build an average of the behaviour. + +.. math:: + C_x(t) = \\left \\langle \\frac{h_{ij}(t_0) h_{ij}(t_0 + t)}{h_{ij}(t_0)^2} \\right\\rangle + +The subscript :math:`x` refers to the definition of lifetime being used, either +continuous or intermittent. The continuous definition measures the time that +a particular hydrogen bond remains continuously attached, whilst the +intermittent definition allows a bond to break and then subsequently reform and +be counted again. The relevent lifetime, :math:`\\tau_x`, can then be found +via integration of this function + +.. math:: + \\tau_x = \\int_0^\\infty C_x(t) dt` + +For this, the observed behaviour is fitted to a multi exponential function, +using 2 exponents for the continuous lifetime and 3 for the intermittent +lifetime. + + :math:`C_x(t) = A_1 \\exp( - t / \\tau_1) + + A_2 \\exp( - t / \\tau_2) + [+ A_3 \\exp( - t / \\tau_3)]` + +Where the final pre expoential factor :math:`A_n` is subject to the condition: + + :math:`A_n = 1 - \\sum\\limits_{i=1}^{n-1} A_i` + +For further details see [Gowers2015]_. + +.. rubric:: References + +.. [Gowers2015] Richard J. Gowers and Paola Carbone, + A multiscale approach to model hydrogen bonding: The case of polyamide + The Journal of Chemical Physics, 142, 224907 (2015), + DOI:http://dx.doi.org/10.1063/1.4922445 + +Input +----- + +Three AtomGroup selections representing the **hydrogens**, **donors** and +**acceptors** that you wish to analyse. Note that the **hydrogens** and +**donors** selections must be aligned, that is **hydrogens[0]** and +**donors[0]** must represent a bonded pair. For systems such as water, +this will mean that each oxygen appears twice in the **donors** AtomGroup. +The function :func:`find_hydrogen_donors` can be used to construct the donor +AtomGroup +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import waterPSF, waterDCD + u = mda.Universe(waterPSF, waterDCD) + hydrogens = u.select_atoms('name H*') + donors = hbonds.find_hydrogen_donors(hydrogens) + + +Note that this requires the Universe to have bond information. If this isn't +present in the topology file, the +:meth:`MDAnalysis.core.groups.AtomGroup.guess_bonds` method can be used +as so +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import GRO + # we could load the Universe with guess_bonds=True + # but this would guess **all** bonds + u = mda.Universe(GRO) + water = u.select_atoms('resname SOL and not type DUMMY') + # guess bonds only within our water atoms + # this adds the bond information directly to the Universe + water.guess_bonds() + hydrogens = water.select_atoms('type H') + # this is now possible as we guessed the bonds + donors = hbonds.find_hydrogen_donors(hydrogens) + + +The keyword **exclusions** allows a tuple of array addresses to be provided, +(Hidx, Aidx),these pairs of hydrogen-acceptor are then not permitted to be +counted as part of the analysis. This could be used to exclude the +consideration of hydrogen bonds within the same functional group, or to perform +analysis on strictly intermolecular hydrogen bonding. + +Hydrogen bonds are defined on the basis of geometric criteria; a +Hydrogen-Acceptor distance of less then **dist_crit** and a +Donor-Hydrogen-Acceptor angle of greater than **angle_crit**. + +The length of trajectory to analyse in ps, **sample_time**, is used to choose +what length to analyse. + +Multiple passes, controlled by the keyword **nruns**, through the trajectory +are performed and an average calculated. For each pass, **nsamples** number +of points along the run are calculated. + + +Output +------ + +All results of the analysis are available through the *solution* attribute. +This is a dictionary with the following keys + +- *results* The raw results of the time autocorrelation function. +- *time* Time axis, in ps, for the results. +- *fit* Results of the exponential curve fitting procedure. For the + *continuous* lifetime these are (A1, tau1, tau2), for the + *intermittent* lifetime these are (A1, A2, tau1, tau2, tau3). +- *tau* Calculated time constant from the fit. +- *estimate* Estimated values generated by the calculated fit. + +The *results* and *time* values are only filled after the :meth:`run` method, +*fit*, *tau* and *estimate* are filled after the :meth:`solve` method has been +used. + + +Worked Example for Polyamide +---------------------------- + +This example finds the continuous hydrogen bond lifetime between N-H..O in a +polyamide system. This will use the default geometric definition for hydrogen +bonds of length 3.0 Å and angle of 130 degrees. +It will observe a window of 2.0 ps (`sample_time`) and try to gather 1000 +sample point within this time window (this relies upon the trajectory being +sampled frequently enough). This process is repeated for 20 different start +points to build a better average. + +:: + + import MDAnalysis as mda + from MDAnalysis.analysis import hbonds + from MDAnalysis.tests.datafiles import TRZ_psf, TRZ + import matplotlib.pyplot as plt + # load system + u = mda.Universe(TRZ_psf, TRZ) + # select atoms of interest into AtomGroups + H = u.select_atoms('name Hn') + N = u.select_atoms('name N') + O = u.select_atoms('name O') + # create analysis object + hb_ac = hbonds.HydrogenBondAutoCorrel(u, + acceptors=O, hydrogens=H, donors=N, + bond_type='continuous', + sample_time=2.0, nsamples=1000, nruns=20) + # call run to gather results + hb_ac.run() + # attempt to fit results to exponential equation + hb_ac.solve() + # grab results from inside object + tau = hb_ac.solution['tau'] + time = hb_ac.solution['time'] + results = hb_ac.solution['results'] + estimate = hb_ac.solution['estimate'] + # plot to check! + plt.plot(time, results, 'ro') + plt.plot(time, estimate) + plt.show() + + +Functions and Classes +--------------------- + +.. autofunction:: find_hydrogen_donors + +.. autoclass:: HydrogenBondAutoCorrel + :members: + +""" +import numpy as np +import scipy.optimize + +import warnings + +from MDAnalysis.lib.log import ProgressBar +from MDAnalysis.lib.distances import capped_distance, calc_angles, calc_bonds +from MDAnalysis.core.groups import requires + +from MDAnalysis.due import due, Doi +due.cite(Doi("10.1063/1.4922445"), + description="Hydrogen bonding autocorrelation time", + path='MDAnalysis.analysis.hbonds.hbond_autocorrel', +) +del Doi + + +@requires('bonds') +def find_hydrogen_donors(hydrogens): + """Returns the donor atom for each hydrogen + + Parameters + ---------- + hydrogens : AtomGroup + the hydrogens that will form hydrogen bonds + + Returns + ------- + donors : AtomGroup + the donor atom for each hydrogen, found via bond information + + + .. versionadded:: 0.20.0 + """ + return sum(h.bonded_atoms[0] for h in hydrogens) + + +class HydrogenBondAutoCorrel(object): + """Perform a time autocorrelation of the hydrogen bonds in the system. + + Parameters + ---------- + universe : Universe + MDAnalysis Universe that all selections belong to + hydrogens : AtomGroup + AtomGroup of Hydrogens which can form hydrogen bonds + acceptors : AtomGroup + AtomGroup of all Acceptor atoms + donors : AtomGroup + The atoms which are connected to the hydrogens. This group + must be identical in length to the hydrogen group and matched, + ie hydrogens[0] is bonded to donors[0]. + For water, this will mean a donor appears twice in this + group, once for each hydrogen. + bond_type : str + Which definition of hydrogen bond lifetime to consider, either + 'continuous' or 'intermittent'. + exclusions : ndarray, optional + Indices of Hydrogen-Acceptor pairs to be excluded. + With nH and nA Hydrogens and Acceptors, a (nH x nA) array of distances + is calculated, *exclusions* is used as a mask on this array to exclude + some pairs. + angle_crit : float, optional + The angle (in degrees) which all bonds must be greater than [130.0] + dist_crit : float, optional + The maximum distance (in Angstroms) for a hydrogen bond [3.0] + sample_time : float, optional + The amount of time, in ps, that you wish to observe hydrogen + bonds for [100] + nruns : int, optional + The number of different start points within the trajectory + to use [1] + nsamples : int, optional + Within each run, the number of frames to analyse [50] + pbc : bool, optional + Whether to consider periodic boundaries in calculations [``True``] + + ..versionchanged: 1.0.0 + ``save_results()`` method was removed. You can instead use ``np.savez()`` + on :attr:`HydrogenBondAutoCorrel.solution['time']` and + :attr:`HydrogenBondAutoCorrel.solution['results']` instead. + """ + + def __init__(self, universe, + hydrogens=None, acceptors=None, donors=None, + bond_type=None, + exclusions=None, + angle_crit=130.0, dist_crit=3.0, # geometric criteria + sample_time=100, # expected length of the decay in ps + time_cut=None, # cutoff time for intermittent hbonds + nruns=1, # number of times to iterate through the trajectory + nsamples=50, # number of different points to sample in a run + pbc=True): + + #warnings.warn("This class is deprecated, use analysis.hbonds.HydrogenBondAnalysis " + # "which has .autocorrelation function", + # category=DeprecationWarning) + + self.u = universe + # check that slicing is possible + try: + self.u.trajectory[0] + except Exception: + raise ValueError("Trajectory must support slicing") from None + + self.h = hydrogens + self.a = acceptors + self.d = donors + if not len(self.h) == len(self.d): + raise ValueError("Donors and Hydrogen groups must be identical " + "length. Try using `find_hydrogen_donors`.") + + self.exclusions = exclusions + if self.exclusions: + if not len(self.exclusions[0]) == len(self.exclusions[1]): + raise ValueError( + "'exclusion' must be two arrays of identical length") + + self.bond_type = bond_type + if self.bond_type not in ['continuous', 'intermittent']: + raise ValueError( + "bond_type must be either 'continuous' or 'intermittent'") + + self.a_crit = np.deg2rad(angle_crit) + self.d_crit = dist_crit + self.pbc = pbc + self.sample_time = sample_time + self.nruns = nruns + self.nsamples = nsamples + self._slice_traj(sample_time) + self.time_cut = time_cut + + self.solution = { + 'results': None, # Raw results + 'time': None, # Time axis of raw results + 'fit': None, # coefficients for fit + 'tau': None, # integral of exponential fit + 'estimate': None # y values of fit against time + } + + def _slice_traj(self, sample_time): + """Set up start and end points in the trajectory for the + different passes + """ + dt = self.u.trajectory.dt # frame step size in time + req_frames = int(sample_time / dt) # the number of frames required + + n_frames = len(self.u.trajectory) + if req_frames > n_frames: + warnings.warn("Number of required frames ({}) greater than the" + " number of frames in trajectory ({})" + .format(req_frames, n_frames), RuntimeWarning) + + numruns = self.nruns + if numruns > n_frames: + numruns = n_frames + warnings.warn("Number of runs ({}) greater than the number of" + " frames in trajectory ({})" + .format(self.nruns, n_frames), RuntimeWarning) + + self._starts = np.arange(0, n_frames, n_frames / numruns, dtype=int) + # limit stop points using clip + self._stops = np.clip(self._starts + req_frames, 0, n_frames) + + self._skip = req_frames // self.nsamples + if self._skip == 0: # If nsamples > req_frames + warnings.warn("Desired number of sample points too high, using {0}" + .format(req_frames), RuntimeWarning) + self._skip = 1 + + def run(self, force=False): + """Run all the required passes + + Parameters + ---------- + force : bool, optional + Will overwrite previous results if they exist + """ + # if results exist, don't waste any time + if self.solution['results'] is not None and not force: + return + + main_results = np.zeros_like(np.arange(self._starts[0], + self._stops[0], + self._skip), + dtype=np.float32) + # for normalising later + counter = np.zeros_like(main_results, dtype=np.float32) + + for i, (start, stop) in ProgressBar(enumerate(zip(self._starts, + self._stops)), total=self.nruns, + desc="Performing run"): + + # needed else trj seek thinks a np.int64 isn't an int? + results = self._single_run(int(start), int(stop)) + + nresults = len(results) + if nresults == len(main_results): + main_results += results + counter += 1.0 + else: + main_results[:nresults] += results + counter[:nresults] += 1.0 + + main_results /= counter + + self.solution['time'] = np.arange( + len(main_results), + dtype=np.float32) * self.u.trajectory.dt * self._skip + self.solution['results'] = main_results + + def _single_run(self, start, stop): + """Perform a single pass of the trajectory""" + self.u.trajectory[start] + + # Calculate partners at t=0 + box = self.u.dimensions if self.pbc else None + + # 2d array of all distances + pair, d = capped_distance(self.h.positions, self.a.positions, max_cutoff=self.d_crit, box=box) + if self.exclusions: + # set to above dist crit to exclude + exclude = np.column_stack((self.exclusions[0], self.exclusions[1])) + pair = np.delete(pair, np.where(pair==exclude), 0) + + hidx, aidx = np.transpose(pair) + + + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + # from amongst those, who also satisfiess angle crit + idx2 = np.where(a > self.a_crit) + hidx = hidx[idx2] + aidx = aidx[idx2] + + nbonds = len(hidx) # number of hbonds at t=0 + results = np.zeros_like(np.arange(start, stop, self._skip), + dtype=np.float32) + + if self.time_cut: + # counter for time criteria + count = np.zeros(nbonds, dtype=np.float64) + + for i, ts in enumerate(self.u.trajectory[start:stop:self._skip]): + box = self.u.dimensions if self.pbc else None + + d = calc_bonds(self.h.positions[hidx], self.a.positions[aidx], + box=box) + a = calc_angles(self.d.positions[hidx], self.h.positions[hidx], + self.a.positions[aidx], box=box) + + winners = (d < self.d_crit) & (a > self.a_crit) + results[i] = winners.sum() + + if self.bond_type == 'continuous': + # Remove losers for continuous definition + hidx = hidx[np.where(winners)] + aidx = aidx[np.where(winners)] + elif self.bond_type == 'intermittent': + if self.time_cut: + # Add to counter of where losers are + count[~ winners] += self._skip * self.u.trajectory.dt + count[winners] = 0 # Reset timer for winners + + # Remove if you've lost too many times + # New arrays contain everything but removals + hidx = hidx[count < self.time_cut] + aidx = aidx[count < self.time_cut] + count = count[count < self.time_cut] + else: + pass + + if len(hidx) == 0: # Once everyone has lost, the fun stops + break + + results /= nbonds + + return results + + def solve(self, p_guess=None): + """Fit results to an multi exponential decay and integrate to find + characteristic time + + Parameters + ---------- + p_guess : tuple of floats, optional + Initial guess for the leastsq fit, must match the shape of the + expected coefficients + + + Continuous defition results are fitted to a double exponential with + :func:`scipy.optimize.leastsq`, intermittent definition are fit to a + triple exponential. + + The results of this fitting procedure are saved into the *fit*, + *tau* and *estimate* keywords in the solution dict. + + - *fit* contains the coefficients, (A1, tau1, tau2) or + (A1, A2, tau1, tau2, tau3) + - *tau* contains the calculated lifetime in ps for the hydrogen + bonding + - *estimate* contains the estimate provided by the fit of the time + autocorrelation function + + In addition, the output of the :func:`~scipy.optimize.leastsq` function + is saved into the solution dict + + - *infodict* + - *mesg* + - *ier* + + """ + + if self.solution['results'] is None: + raise ValueError( + "Results have not been generated use, the run method first") + + # Prevents an odd bug with leastsq where it expects + # double precision data sometimes... + time = self.solution['time'].astype(np.float64) + results = self.solution['results'].astype(np.float64) + + def within_bounds(p): + """Returns True/False if boundary conditions are met or not. + Uses length of p to detect whether it's handling continuous / + intermittent + + Boundary conditions are: + 0 < A_x < 1 + sum(A_x) < 1 + 0 < tau_x + """ + if len(p) == 3: + A1, tau1, tau2 = p + return (A1 > 0.0) & (A1 < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) + elif len(p) == 5: + A1, A2, tau1, tau2, tau3 = p + return (A1 > 0.0) & (A1 < 1.0) & (A2 > 0.0) & \ + (A2 < 1.0) & ((A1 + A2) < 1.0) & \ + (tau1 > 0.0) & (tau2 > 0.0) & (tau3 > 0.0) + + def err(p, x, y): + """Custom residual function, returns real residual if all + boundaries are met, else returns a large number to trick the + leastsq algorithm + """ + if within_bounds(p): + return y - self._my_solve(x, *p) + else: + return np.full_like(y, 100000) + + def double(x, A1, tau1, tau2): + """ Sum of two exponential functions """ + A2 = 1 - A1 + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + + def triple(x, A1, A2, tau1, tau2, tau3): + """ Sum of three exponential functions """ + A3 = 1 - (A1 + A2) + return A1 * np.exp(-x / tau1) + A2 * np.exp(-x / tau2) + A3 * np.exp(-x / tau3) + + if self.bond_type == 'continuous': + self._my_solve = double + + if p_guess is None: + p_guess = (0.5, 10 * self.sample_time, self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, tau1, tau2 = p + A2 = 1 - A1 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + else: + self._my_solve = triple + + if p_guess is None: + p_guess = (0.33, 0.33, 10 * self.sample_time, + self.sample_time, 0.1 * self.sample_time) + + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) + self.solution['fit'] = p + A1, A2, tau1, tau2, tau3 = p + A3 = 1 - A1 - A2 + self.solution['tau'] = A1 * tau1 + A2 * tau2 + A3 * tau3 + + self.solution['infodict'] = infodict + self.solution['mesg'] = mesg + self.solution['ier'] = ier + + if ier in [1, 2, 3, 4]: # solution found if ier is one of these values + self.solution['estimate'] = self._my_solve( + self.solution['time'], *p) + else: + warnings.warn("Solution to results not found", RuntimeWarning) + + def __repr__(self): + return ("" + "".format(btype=self.bond_type, n=len(self.h))) diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py index d46ab54ef35..de2993fde8c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py @@ -30,12 +30,11 @@ from numpy.testing import assert_almost_equal import numpy as np from unittest import mock -import os from importlib import reload import MDAnalysis as mda -from MDAnalysis.analysis import hbonds -from MDAnalysis.analysis.hbonds import HydrogenBondAutoCorrel as HBAC +from MDAnalysis.analysis.hydrogenbonds import (HydrogenBondAutoCorrel as HBAC, + find_hydrogen_donors) class TestHydrogenBondAutocorrel(object): @@ -282,7 +281,7 @@ def test_find_donors(): H = u.select_atoms('name H*') - D = hbonds.find_hydrogen_donors(H) + D = find_hydrogen_donors(H) assert len(H) == len(D) # check each O is bonded to the corresponding H @@ -294,11 +293,4 @@ def test_donors_nobonds(): u = mda.Universe(XYZ_mini) with pytest.raises(mda.NoDataError): - hbonds.find_hydrogen_donors(u.atoms) - - -def test_moved_module_warning(): - wmsg = ("This module will be moved to " - "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel") - with pytest.warns(DeprecationWarning, match=wmsg): - reload(hbonds.hbond_autocorrel) + find_hydrogen_donors(u.atoms) diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel_deprecated.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel_deprecated.py new file mode 100644 index 00000000000..c51df85f319 --- /dev/null +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel_deprecated.py @@ -0,0 +1,331 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest + +from MDAnalysisTests.datafiles import ( + TRZ, TRZ_psf, + waterPSF, waterDCD, + XYZ_mini, +) +from numpy.testing import assert_almost_equal +import numpy as np +from unittest import mock +from importlib import reload + +import MDAnalysis as mda +from MDAnalysis.analysis import hbonds +from MDAnalysis.analysis.hbonds import HydrogenBondAutoCorrel as HBAC + +@pytest.fixture(scope="module") +def u_water(): + return mda.Universe(waterPSF, waterDCD) + +class TestHydrogenBondAutocorrel(object): + @pytest.fixture() + def u(self): + return mda.Universe(TRZ_psf, TRZ) + + @pytest.fixture() + def hydrogens(self, u): + return u.atoms.select_atoms('name Hn') + + @pytest.fixture() + def nitrogens(self, u): + return u.atoms.select_atoms('name N') + + @pytest.fixture() + def oxygens(self, u): + return u.atoms.select_atoms('name O') + + # regression tests for different conditions + def test_continuous(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06, + ) + hbond.run() + + assert_almost_equal( + hbond.solution['results'], + np.array([ 1. , 0.92668623, 0.83137828, + 0.74486804, 0.67741936, 0.60263932], + dtype=np.float32) + ) + + def test_continuous_excl(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + exclusions=(np.arange(len(hydrogens)), np.array( + range(len(oxygens)))), + sample_time=0.06, + ) + hbond.run() + + assert_almost_equal( + hbond.solution['results'], + np.array([ 1. , 0.92668623, 0.83137828, + 0.74486804, 0.67741936, 0.60263932], + dtype=np.float32) + ) + + + def test_intermittent(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + sample_time=0.06, + ) + hbond.run() + + assert_almost_equal( + hbond.solution['results'], + np.array([ 1. , 0.92668623, 0.84310848, + 0.79325515, 0.76392961, 0.72287393], + dtype=np.float32) + ) + + + def test_intermittent_timecut(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + time_cut=0.01, # time cut at traj.dt == continuous + sample_time=0.06, + ) + hbond.run() + + assert_almost_equal( + hbond.solution['results'], + np.array([ 1. , 0.92668623, 0.83137828, + 0.74486804, 0.67741936, 0.60263932], + dtype=np.float32) + ) + + def test_intermittent_excl(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + exclusions=(np.arange(len(hydrogens)), np.array( + range(len(oxygens)))), + sample_time=0.06, + ) + hbond.run() + + assert_almost_equal( + hbond.solution['results'], + np.array([ 1. , 0.92668623, 0.84310848, + 0.79325515, 0.76392961, 0.72287393], + dtype=np.float32) + ) + + # For `solve` the test trajectories aren't long enough + # So spoof the results and check that solver finds solution + def test_solve_continuous(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06, + ) + + def actual_function_cont(t): + A1 = 0.75 + A2 = 0.25 + tau1 = 0.5 + tau2 = 0.1 + return A1 * np.exp(-t/tau1) + A2 * np.exp(-t/tau2) + hbond.solution['time'] = time = np.arange(0, 0.06, 0.001) + hbond.solution['results'] = actual_function_cont(time) + + hbond.solve() + + assert_almost_equal( + hbond.solution['fit'], + np.array([0.75, 0.5, 0.1]), + ) + + def test_solve_intermittent(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + sample_time=0.06, + ) + + def actual_function_int(t): + A1 = 0.33 + A2 = 0.33 + A3 = 0.34 + tau1 = 5 + tau2 = 1 + tau3 = 0.1 + return A1 * np.exp(-t/tau1) + A2 * np.exp(-t/tau2) + A3 * np.exp(-t/tau3) + hbond.solution['time'] = time = np.arange(0, 6.0, 0.01) + hbond.solution['results'] = actual_function_int(time) + + hbond.solve() + + assert_almost_equal( + hbond.solution['fit'], + np.array([0.33, 0.33, 5, 1, 0.1]), + ) + + # setup errors + def test_wronglength_DA(self, u, hydrogens, oxygens, nitrogens): + with pytest.raises(ValueError): + HBAC(u, + hydrogens=hydrogens[:-1], + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + exclusions=(np.arange(len(hydrogens)), np.array( + range(len(oxygens)))), + sample_time=0.06, + ) + + def test_exclusions(self, u, hydrogens, oxygens, nitrogens): + excl_list = (np.array(range(len(hydrogens))), np.array( + range(len(oxygens)))) + excl_list2 = excl_list[0], excl_list[1][:-1] + with pytest.raises(ValueError): + HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='intermittent', + exclusions=excl_list2, + sample_time=0.06, + ) + + def test_bond_type_VE(self, u, hydrogens, oxygens, nitrogens): + with pytest.raises(ValueError): + HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='marzipan', + exclusions=(np.arange(len(hydrogens)), np.array(range( + len(oxygens)))), + sample_time=0.06, + ) + + def test_solve_before_run_VE(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06, + ) + with pytest.raises(ValueError): + hbond.solve() + + @mock.patch('MDAnalysis.coordinates.TRZ.TRZReader._read_frame') + def test_unslicable_traj_VE(self, mock_read, u, hydrogens, oxygens, nitrogens): + mock_read.side_effect = TypeError + + with pytest.raises(ValueError): + HBAC( + u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06 + ) + + def test_repr(self, u, hydrogens, oxygens, nitrogens): + hbond = HBAC(u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06, + ) + assert isinstance(repr(hbond), str) + + def test_deprecation_warning(self, u, hydrogens, oxygens, nitrogens): + wmsg = ('`HydrogenBondAutoCorrel` is deprecated!\n' + '`HydrogenBondAutoCorrel` will be removed in release 3.0.0.\n' + 'The class was moved to MDAnalysis.analysis.hbonds.hbond_autocorrel.') + with pytest.warns(DeprecationWarning, match=wmsg): + HBAC( + u, + hydrogens=hydrogens, + acceptors=oxygens, + donors=nitrogens, + bond_type='continuous', + sample_time=0.06 + ) + + + +def test_find_donors(u_water): + H = u_water.select_atoms('name H*') + + D = hbonds.find_hydrogen_donors(H) + + assert len(H) == len(D) + # check each O is bonded to the corresponding H + for h_atom, o_atom in zip(H, D): + assert o_atom in h_atom.bonded_atoms + + +def test_donors_nobonds(): + u = mda.Universe(XYZ_mini) + + with pytest.raises(mda.NoDataError): + hbonds.find_hydrogen_donors(u.atoms) + + +def test_find_hydrogen_donors_deprecation_warning(u_water): + H = u_water.select_atoms('name H*') + wmsg = ('`find_hydrogen_donors` is deprecated!\n' + '`find_hydrogen_donors` will be removed in release 3.0.0.\n' + 'The function was moved to MDAnalysis.analysis.hbonds.hbond_autocorrel.') + with pytest.warns(DeprecationWarning, match=wmsg): + hbonds.find_hydrogen_donors(H) + + +def test_moved_module_warning(): + wmsg = ("This module was moved to " + "MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel; " + "hbonds.hbond_autocorrel will be removed in 3.0.0.") + with pytest.warns(DeprecationWarning, match=wmsg): + reload(hbonds.hbond_autocorrel) + + From 214548395bc0f2d85c387c5235921ef7d598f816 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 5 May 2021 16:56:19 -0700 Subject: [PATCH 6/7] include hydrogenbonds.hbond_autocorrel in docs - make hydrogenbonds.hbond_autocorrel primary entry for docs - add entry for deprecated module at bottom of Hydrogen bonding docs - make clear in CHANGELOG that hbonds.hbond_autocorrel will be removed in 3.0.0 --- package/CHANGELOG | 11 ++++++----- .../MDAnalysis/analysis/hbonds/hbond_autocorrel.py | 4 ++-- .../documentation_pages/analysis/hbond_autocorrel.rst | 2 +- .../analysis/hbond_autocorrel_deprecated.rst | 1 + .../source/documentation_pages/analysis_modules.rst | 7 +++++++ 5 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel_deprecated.rst diff --git a/package/CHANGELOG b/package/CHANGELOG index ff3184df4e8..a8531f85f3d 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -36,7 +36,7 @@ Fixes removed (Issue #3209) * Fixed 'sphzone', 'sphlayer', 'cyzone', and 'cylayer' to return empty if the zone/layer is empty, consistent with 'around' (Issue #2915) - * A Universe created from an ROMol with no atoms returns now a Universe + * A Universe created from an ROMol with no atoms returns now a Universe with 0 atoms (Issue #3142) * PDBParser will check for the presence of the chainID attribute of an atom group and use these values instead of just using the end of segid. @@ -113,10 +113,10 @@ Enhancements * Switch GNMAnalysis to AnalysisBase (Issue #3243) * Adds python 3.9 support (Issue #2974, PR #3027, #3245) * Added an MDAnalysis shields.io badge to the README (Issue #3227, PR #3229) - * Added sort method to the atomgroup (Issue #2976, PR #3188) + * Added sort method to the atomgroup (Issue #2976, PR #3188) * ITPParser now reads [ atomtypes ] sections in ITP files, used for charges and masses not defined in the [ atoms ] sections - * Add `set_dimensions` transformation class for setting constant + * Add `set_dimensions` transformation class for setting constant box dimensions for all timesteps in trajectory (Issue #2691) * Added a ValueError raised when not given a gridcenter while providing grid dimensions to DensityAnalysis, also added @@ -175,7 +175,7 @@ Enhancements Changes * Deprecated analysis.hbonds.hbond_analysis has been removed in favour of analysis.hydrogenbonds.hbond_analysis (Issues #2739, #2746) - * Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134) + * Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134) * TPRParser now loads TPR files with `tpr_resid_from_one=True` by default, which starts TPR resid indexing from 1 (instead of 0 as in 1.x) (Issue #2364, PR #3152) * Introduces encore specific C compiler arguments to allow for lowering of @@ -220,7 +220,8 @@ Changes Deprecations * The analysis.hbonds.hbond_autocorrel code has been moved to - analysis.hydrogenbonds.hbond_autocorrel (PR #3258) + analysis.hydrogenbonds.hbond_autocorrel and will be removed + from analysis.hbonds in 3.0.0 (PR #3258) * In 2.1.0 the TRZReader will default to a dt of 1.0 ps when failing to read it from the input TRZ trajectory. diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index f8329f4a2fa..a09f655d3d8 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -20,8 +20,8 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` -==================================================================================== +"""Hydrogen bond autocorrelation --- :mod:`MDAnalysis.analysis.hbonds.hbond_autocorrel` (deprecated) +==================================================================================================== :Author: Richard J. Gowers :Year: 2014 diff --git a/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel.rst b/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel.rst index bf960e41bf4..79b79c7e364 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel.rst @@ -1 +1 @@ -.. automodule:: MDAnalysis.analysis.hbonds.hbond_autocorrel +.. automodule:: MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel diff --git a/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel_deprecated.rst b/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel_deprecated.rst new file mode 100644 index 00000000000..bf960e41bf4 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/hbond_autocorrel_deprecated.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.analysis.hbonds.hbond_autocorrel diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 8209d096d8a..24f732fe798 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -71,6 +71,13 @@ Hydrogen bonding analysis/hbond_autocorrel analysis/wbridge_analysis +Deprecated modules: + +.. toctree:: + :maxdepth: 1 + + analysis/hbond_autocorrel_deprecated + Membranes and membrane proteins =============================== From fe76676c4ea26be46c4065e4289234a7b119b8d5 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 5 May 2021 17:16:04 -0700 Subject: [PATCH 7/7] fixed doc in hydrogenbonds.wbridge_analysis - fix #3259 (title of Water Bridge Analysis docs is not correct) - include the Gregoret 1991 reference (that was removed with the deprecated hbonds.hbond_analysis module) - minor formatting fixes in the docs --- .../hydrogenbonds/wbridge_analysis.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 3836246481d..2ad8d94565b 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -21,10 +21,8 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -# Water Bridge Analysis -r"""Water Bridge analysis --- -mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` -=============================================================================== +r"""Water Bridge analysis --- :mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` +========================================================================================== :Author: Zhiyi Wu :Year: 2017-2018 @@ -262,8 +260,18 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): ```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy atom names to MDAnalysis. -How to perform WaterBridgeAnalysis ----------------------------------- + +.. rubric:: References + +.. [Gregoret1991] L.M. Gregoret, S.D. Rader, R.J. Fletterick, and + F.E. Cohen. Hydrogen bonds involving sulfur atoms in proteins. Proteins, + 9(2):99–107, 1991. `10.1002/prot.340090204`_. + +.. _`10.1002/prot.340090204`: http://dx.doi.org/10.1002/prot.340090204 + + +How to perform ``WaterBridgeAnalysis`` +-------------------------------------- All water bridges between arginine and aspartic acid can be analysed with :: @@ -272,14 +280,14 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): u = MDAnalysis.Universe('topology', 'trajectory') w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', - 'resname ASP') + 'resname ASP') w.run() The maximum number of bridging waters detected can be changed using the order keyword. :: w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', - 'resname ASP', order=3) + 'resname ASP', order=3) Thus, a maximum of three bridging waters will be detected. @@ -301,7 +309,7 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): # 5 3 ASP OD2 print(w.timeseries) -prints out. :: +prints out :: [ # frame 1 # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 @@ -319,8 +327,8 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): .. _wb_count_by_type: -Use count_by_type ------------------ +Use ``count_by_type`` +--------------------- We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to generate the frequence of all water bridges in the simulation. :: @@ -561,8 +569,8 @@ def analysis(current, output, **kwargs): .. _wb_count_by_time: -Use count_by_time ------------------ +Use ``count_by_time`` +--------------------- :meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which might be desirable in some cases but not the others.