From 42dafa1a9cf50fa1ac052ab4f86f808ac0ec45db Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Thu, 28 Jan 2021 09:55:41 +0000 Subject: [PATCH 01/14] add deprecation --- package/CHANGELOG | 2 + .../analysis/hbonds/wbridge_analysis.py | 1546 +--------------- .../analysis/hydrogenbonds/__init__.py | 1 + .../hydrogenbonds/wbridge_analysis.py | 1614 +++++++++++++++++ .../analysis/wbridge_analysis.rst | 2 +- .../MDAnalysisTests/analysis/test_wbridge.py | 16 +- 6 files changed, 1633 insertions(+), 1548 deletions(-) create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py diff --git a/package/CHANGELOG b/package/CHANGELOG index 50dc6941c8a..13266ed8ff7 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -70,6 +70,8 @@ Deprecations only a Universe, in 2.0.0 * analysis.helanal will be removed in 2.0.0 and replaced by analysis.helix_analysis (PR #2622) + * hbonds.WaterBridgeAnalysis will be removed in 2.0.0 and + replaced with hydrogenbonds.WaterBridgeAnalysis (#3111) 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index 3e7453f0edf..bb133e74397 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -22,1541 +22,11 @@ # # Water Bridge Analysis -r"""Water Bridge analysis --- :mod:`MDAnalysis.analysis.hbonds.WaterBridgeAnalysis` -=============================================================================== - -:Author: Zhiyi Wu -:Year: 2017-2018 -:Copyright: GNU Public License v3 -:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub - - -.. _`@xiki-tempula`: https://github.com/xiki-tempula - - -Given a :class:`~MDAnalysis.core.universe.Universe` (simulation -trajectory with 1 or more frames) measure all water bridges for each -frame between selections 1 and 2. -Water bridge is defined as a bridging water which simultaneously forms -two hydrogen bonds with atoms from both selection 1 and selection 2. - -A water bridge can form between two hydrogen bond acceptors. - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O−H···:\ :sup:`-`\ O\ :sub:`2`\ C- - -A water bridge can also form between two hydrogen bond donors. - -e.g. -NH···:O:···HN- (where O is the oxygen of a bridging water) - -A hydrogen bond acceptor and another hydrogen bond donor can be bridged by a -water. - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) - -A higher order water bridge is defined as more than one water bridging -hydrogen bond acceptor and donor. An example of a second order water bridge: - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) - -The following keyword arguments are important to control the behaviour of the -water bridge analysis: - - - *water_selection* (``resname SOL``): the selection string for the bridging - water - - *order* the maximum number of water bridging both ends - - donor-acceptor *distance* (Å): 3.0 - - Angle *cutoff* (degrees): 120.0 - - *forcefield* to switch between default values for different force fields - - *donors* and *acceptors* atom types (to add additional atom names) - -Theory ------- - -This module attempts to find multi-order water bridge by an approach similar -to breadth-first search, where the first solvation shell of selection 1 is -selected, followed by the selection of the second solvation shell as well as -any hydrogen bonding partner from selection 1. After that, the third solvation -shell, as well as any binding partners from selection 2, are detected. This -process is repeated until the maximum order of water bridges is reached. - -.. _wb_Analysis_Network: - -Output as Network ------------------ - -Since the waters connecting the two ends of the selections are by nature a network. -We provide a network representation of the water network. Water bridge data are -returned per frame, which is stored in :attr:`WaterBridgeAnalysis.network`. Each -frame is represented as a dictionary, where the keys are the hydrogen bonds -originating from selection 1 and the values are new dictionaries representing -the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds -with selection 1. - -As for the hydrogen bonds which reach the selection 2, the values of the -corresponding keys are None. One example where selection 1 and selection 2 are -joined by one water molecule (A) which also hydrogen bond to another water (B) -which also hydrogen bond to selection 2 would be represented as :: - - # (selection 1)-O:···H-O(water 1):···H-(selection 2) - # | : - # H·············O-H(water2) - # H - {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): - {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None}, - {(water1_donor, water1_donor_heavy, water2_acceptor, None, distance, angle): - {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None} - }, - } - -The atoms are represented by atom index and if the atom is hydrogen bond donor, -it is followed by the index of the corresponding heavy atom -``(donor_proton, donor_heavy_atom)``. -If the atom is a hydrogen bond acceptor, it is followed by none. - -.. _wb_Analysis_Timeseries: - -Output as Timeseries --------------------- - -For lower order water bridges, it might be desirable to represent the connections as -:attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and -are a list of hydrogen bonds between the selection 1 or selection 2 and the -bridging waters. Due to the complexity of the higher order water bridge and the -fact that one hydrogen bond between two waters can appear in both third and -fourth order water bridge, the hydrogen bonds in the -:attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first -search manner to avoid duplication. Example code of how -:attr:`WaterBridgeAnalysis.timeseries` is generated:: - - def network2timeseries(network, timeseries): - '''Traverse the network in a depth-first fashion. - expand_timeseries will expand the compact representation to the familiar - timeseries representation.''' - - if network is None: - return - else: - for node in network: - timeseries.append(expand_timeseries(node)) - network2timeseries(network[node], timeseries) - - timeseries = [] - network2timeseries(network, timeseries) - -The list is formatted similar to the \ :attr:`HydrogenBondAnalysis.timeseries -` -except that the atom identifier is expressed as (residue name, residue number, -atom name). An example would be. :: - - results = [ - [ # frame 1 - [ , , - (, , ), - (, , ), - , ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - -Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat -the results as a flat "normalised" table that is easier to import into a -database or dataframe for further processing. - -Detection of water bridges --------------------------- -Water bridges are recorded if a bridging water simultaneously forms -hydrogen bonds with selection 1 and selection 2. - -Hydrogen bonds are detected as is described in \ -:class:`~MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis`, see \ -:ref:`Detection-of-hydrogen-bonds`. - -The lists of donor and acceptor names can be extended by providing lists of -atom names in the `donors` and `acceptors` keywords to -:class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate -(e.g. when analysing simulations done with a force field that uses very -different atom names) then one should either use the value "other" for -`forcefield` to set no default values or derive a new class and set the -default list oneself:: - - class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): - DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} - DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} - -Then simply use the new class instead of the parent class and call it with -```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy -atom names to MDAnalysis. - -How to perform WaterBridgeAnalysis ----------------------------------- - -All water bridges between arginine and aspartic acid can be analysed with :: - - import MDAnalysis - import MDAnalysis.analysis.hbonds - - u = MDAnalysis.Universe('topology', 'trajectory') - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') - w.run() - -The maximum number of bridging waters detected can be changed using the order keyword. :: - - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', - order=3) - -Thus, a maximum of three bridging waters will be detected. - -An example of using the :attr:`~WaterBridgeAnalysis` would be -detecting the percentage of time a certain water bridge exits. - -Trajectory :code:`u` has two frames, where the first frame contains a water -bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic -group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but -is between the oxygen of the arginine and the other oxygen in the carboxylic -group (ASP3:OD2). :: - - # index residue id residue name atom name - # 0 1 ARG O - # 1 2 SOL OW - # 2 2 SOL HW1 - # 3 2 SOL HW2 - # 4 3 ASP OD1 - # 5 3 ASP OD2 - print(w.timeseries) - -prints out. :: - - [ # frame 1 - # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 - [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], - [3,4,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], - ], - # frame 2 - # Another water bridge SOL2 links O from ARG1 to the other oxygen of the - # carboxylic group OD2 of ASP3 - [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], - [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], - ], - ] - - -.. _wb_count_by_type: - -Use count_by_type ------------------ - -We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to -generate the frequence of all water bridges in the simulation. :: - - w.count_by_type() - -Returns :: - - [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), - (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] - -You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped -and thus, they should be counted as the same type of water bridge. The type of the water -bridge can be customised by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. - -The analysis function has two parameters. The current and the output. The current is a list -of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as -:attr:`WaterBridgeAnalysis.network`, and an example will be :: - - [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. - [ 0, None, 2, 1, 3.0,180], - # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. - [ 3, 1, 4, None, 3.0,180],] - -Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is -the final hydrogen bond ending in selection 2. The output sums up all the information in the -current frame and is a dictionary with a user-defined key and the value is the weight of the -corresponding key. During the analysis phase, the function analysis iterates through all the -water bridges and modify the output in-place. At the end of the analysis, the keys from -all the frames are collected and the corresponding values will be summed up and returned. :: - - def analysis(current, output, u): - r'''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms.''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # if the residue name is ASP and the atom name is OD2 or OD1, - # the atom name is changed to OD - if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): - s2_name = 'OD' - # setting up the key which defines this type of water bridge. - key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - # The number of this type of water bridge is incremented by 1. - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -Returns :: - - [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] - -Note that the result is arranged in the format of ``(key, the proportion of time)``. When no -custom analysis function is supplied, the key is expanded for backward compatibility. So -that when the same code is executed, the result returned will be the same as the result -given since version 0.17.0 and the same as the -:meth:`HydrogenBondAnalysis.count_by_type`. - -Some people might only interested in contacts between residues and pay no attention -to the details regarding the atom name. However, since multiple water bridges can -exist between two residues, which sometimes can give a result such that the water -bridge between two residues exists 300% of the time. Though this might be a desirable -result for some people, others might want the water bridge between two residues to be -only counted once per frame. This can also be achieved by supplying an analysis function -to :meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # s1_name and s2_name are not included in the key - key = (s1_resname, s1_resid, s2_resname, s2_resid) - - # Each residue is only counted once per frame - output[key] = 1 - - w.count_by_type(analysis_func=analysis) - -Returns :: - - [(('ARG', 1, 'ASP', 3), 1.0),] - -On the other hand, other people may insist that the first order and second-order water -bridges shouldn't be mixed together, which can also be achieved by supplying an analysis -function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # order of the current water bridge is computed - order_of_water_bridge = len(current) - 1 - # and is included in the key - key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) - # The number of this type of water bridge is incremented by 1. - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: - - [(('ARG', 1, 'ASP', 3, 1), 1.0),] - -Some people might not be interested in the interactions related to arginine. The undesirable -interactions can be discarded by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - if not s1_resname == 'ARG': - key = (s1_resname, s1_resid, s2_resname, s2_resid) - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -Returns nothing in this case :: - - [,] - -Additional keywords can be supplied to the analysis function by passing through -:meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, **kwargs): - ... - w.count_by_type(analysis_func=analysis, **kwargs) - - -.. _wb_count_by_time: - -Use count_by_time ------------------ - -:meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which -might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` -provides additional functionality for aggregating results for each frame. - -The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of -water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt -bridge for example: :: - - w.count_by_time() - -As one water bridge is found in both frames, the method returns :: - - [(1.0, 1), (2.0, 1), ] - -Similar to :meth:`~WaterBridgeAnalysis.count_by_type` -The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying -an analysis function. - -Suppose we want to count - - - the **number** of water molecules involved in bridging selection 1 to selection 2. - - only if water bridge terminates in atom name **OD1 of ASP**. - - only when water bridge is joined by less than **two** water. - -The analysis function can be written as:: - - def analysis(current, output, u, **kwargs): - '''This function defines how the counting of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - - # only the residue name is ASP and the atom name is OD1, - if s2_resname == 'ASP' and s2_name == 'OD1': - # only if the order of water bridge is less than 2 - if len(current) -1 < 2: - # extract all water molecules involved in the water bridge - # extract the first water from selection 1 - s1_index, to_index, (s1_resname, s1_resid, s1_name), - (to_resname, to_resid, to_name), dist, angle = current[0] - key = (to_resname, to_resid) - output[key] = 1 - - # extract all the waters between selection 1 and selection 2 - for hbond in current[1:-1]: - # decompose the hydrogen bond. - from_index, to_index, (from_resname, from_resid, from_name), - (to_resname, to_resid, to_name), dist, angle = hbond - # add first water - key1 = (from_resname, from_resid) - output[key1] = 1 - # add second water - key2 = (to_resname, to_resid) - output[key2] = 1 - - # extract the last water to selection 2 - from_index, s2_index, (from_resname, from_resid, from_name), - (s2_resname, s2_resid, s2_name), dist, angle = current[-1] - key = (from_resname, from_resid) - output[key] = 1 - - w.count_by_time(analysis_func=analysis) - -Returns :: - - [(1.0, 1), (2.0, 0),] - -Classes -------- - -.. autoclass:: WaterBridgeAnalysis - :members: - - .. attribute:: timesteps - - List of the times of each timestep. This can be used together with - :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point - of a water bridge existence. - -""" -from __future__ import print_function, absolute_import, division - -from collections import defaultdict -import logging -import warnings -import six -import numpy as np - -from ..base import AnalysisBase -from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch -from MDAnalysis.lib.distances import capped_distance, calc_angles -from MDAnalysis import NoDataError, MissingDataWarning, SelectionError -from MDAnalysis.lib import distances - -logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') - -class WaterBridgeAnalysis(AnalysisBase): - """Perform a water bridge analysis - - The analysis of the trajectory is performed with the - :meth:`WaterBridgeAnalysis.run` method. The result is stored in - :attr:`WaterBridgeAnalysis.timeseries`. See - :meth:`~WaterBridgeAnalysis.run` for the format. - - :class:`WaterBridgeAnalysis` uses the same default atom names as - :class:`~MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis`, - see :ref:`Default atom names for hydrogen bonding analysis` - - - .. versionadded:: 0.17.0 - - - """ - # use tuple(set()) here so that one can just copy&paste names from the - # table; set() takes care for removing duplicates. At the end the - # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. - - #: default heavy atom names whose hydrogens are treated as *donors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `donors` to add a list of additional donor names. - DEFAULT_DONORS = { - 'CHARMM27': tuple(set([ - 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', - 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), - 'other': tuple(set([]))} - - #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `acceptors` to add a list of additional acceptor names. - DEFAULT_ACCEPTORS = { - 'CHARMM27': tuple(set([ - 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', - 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), - 'other': tuple(set([]))} - - #: A :class:`collections.defaultdict` of covalent radii of common donors - #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is - #: sufficiently close to its donor heavy atom). Values are stored for - #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens - #: covalently bound at a maximum distance of 1.5 Å. - r_cov = defaultdict(lambda: 1.5, # default value - N=1.31, O=1.31, P=1.58, S=1.55) - - def __init__(self, universe, selection1='protein', - selection2='not resname SOL', water_selection='resname SOL', order=1, - selection1_type='both', update_selection=False, update_water_selection=True, - filter_first=True, distance_type='hydrogen', distance=3.0, - angle=120.0, forcefield='CHARMM27', donors=None, - acceptors=None, output_format="sele1_sele2", debug=None, verbose=False, - pbc=False, **kwargs): - """Set up the calculation of water bridges between two selections in a - universe. - - The timeseries is accessible as the attribute - :attr:`WaterBridgeAnalysis.timeseries`. - - If no hydrogen bonds are detected or if the initial check fails, look - at the log output (enable with :func:`MDAnalysis.start_logging` and set - `verbose` ``=True``). It is likely that the default names for donors - and acceptors are not suitable (especially for non-standard - ligands). In this case, either change the `forcefield` or use - customized `donors` and/or `acceptors`. - - Parameters - ---------- - universe : Universe - Universe object - selection1 : str (optional) - Selection string for first selection ['protein'] - selection2 : str (optional) - Selection string for second selection ['not resname SOL'] - This string selects everything except water where water is assumed - to have a residue name as SOL. - water_selection : str (optional) - Selection string for bridging water selection ['resname SOL'] - The default selection assumes that the water molecules have residue - name "SOL". Change it to the appropriate selection for your - specific force field. - - However, in theory, this selection can be anything which forms - a hydrogen bond with selection 1 and selection 2. - order : int (optional) - The maximum number of water bridges linking both selections. - if the order is set to 3, then all the residues linked with less than - three water molecules will be detected. [1] - - Computation of high order water bridges can be very time-consuming. - Think carefully before running the calculation, do you really want - to compute the 20th order water bridge between domain A and domain B - or you just want to know the third order water bridge between two residues. - selection1_type : {"donor", "acceptor", "both"} (optional) - Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the - value for `selection1_type` automatically determines how - `selection2` handles donors and acceptors: If `selection1` contains - 'both' then `selection2` will also contain 'both'. If `selection1` - is set to 'donor' then `selection2` is 'acceptor' (and vice versa). - ['both']. - update_selection : bool (optional) - Update selection 1 and 2 at each frame. Setting to ``True`` if the - selection is not static. Selections are filtered first to speed up - performance. Thus, setting to ``True`` is recommended if contact - surface between selection 1 and selection 2 is constantly - changing. [``False``] - update_water_selection : bool (optional) - Update selection of water at each frame. Setting to ``False`` is - **only** recommended when the total amount of water molecules in the - simulation are small and when water molecules remain static across - the simulation. - - However, in normal simulations, only a tiny proportion of water is - engaged in the formation of water bridge. It is recommended to - update the water selection and set keyword `filter_first` to - ``True`` so as to filter out water not residing between the two - selections. [``True``] - filter_first : bool (optional) - Filter the water selection to only include water within 4 Å + `order` * - (2 Å + `distance`) away from `both` selection 1 and selection 2. - Selection 1 and selection 2 are both filtered to only include atoms - with the same distance away from the other selection. [``True``] - distance : float (optional) - Distance cutoff for hydrogen bonds; only interactions with a H-A - distance <= `distance` (and the appropriate D-H-A angle, see - `angle`) are recorded. (Note: `distance_type` can change this to - the D-A distance.) [3.0] - angle : float (optional) - Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of - 180º. A hydrogen bond is only recorded if the D-H-A angle is - >= `angle`. The default of 120º also finds fairly non-specific - hydrogen interactions and possibly better value is 150º. [120.0] - forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) - Name of the forcefield used. Switches between different - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS` values. - ["CHARMM27"] - donors : sequence (optional) - Extra H donor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS`), must be a sequence. - acceptors : sequence (optional) - Extra H acceptor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS`), must be a - sequence. - distance_type : {"hydrogen", "heavy"} (optional) - Measure hydrogen bond lengths between donor and acceptor heavy - atoms ("heavy") or between donor hydrogen and acceptor heavy - atom ("hydrogen"). If using "heavy" then one should set the - *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] - output_format: {"sele1_sele2", "donor_acceptor"} (optional) - Setting the output format for timeseries and table. If set to - "sele1_sele2", for each hydrogen bond, the one close to selection 1 - will be placed before selection 2. If set to "donor_acceptor", the - donor will be placed before acceptor. "sele1_sele2"] - debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) - verbose : bool (optional) - Toggle progress output. (Can also be given as keyword argument to - :meth:`run`.) - - Notes - ----- - In order to speed up processing, atoms are filtered by a coarse - distance criterion before a detailed hydrogen bonding analysis is - performed (`filter_first` = ``True``). - - If selection 1 and selection 2 are very mobile during the simulation - and the contact surface is constantly changing (i.e. residues are - moving farther than 4 Å + `order` * (2 Å + `distance`)), you might - consider setting the `update_selection` keywords to ``True`` - to ensure correctness. - - .. versionchanged 0.20.0 - The :attr:`WaterBridgeAnalysis.timeseries` has been updated - see :attr:`WaterBridgeAnalysis.timeseries` for detail. - This class is now based on - :class:`~MDAnalysis.analysis.base.AnalysisBase`. - - - """ - super(WaterBridgeAnalysis, self).__init__(universe.trajectory, - **kwargs) - self.water_selection = water_selection - self.update_water_selection = update_water_selection - # per-frame debugging output? - self.debug = debug - - # set the output format - self.output_format = output_format - - self.u = universe - self.selection1 = selection1 - self.selection2 = selection2 - self.selection1_type = selection1_type - - # if the selection 1 and selection 2 are the same - if selection1 == selection2: - # eliminate the duplication - self.selection1_type = "donor" - self.update_selection = update_selection - self.filter_first = filter_first - self.distance = distance - self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior - self.angle = angle - self.pbc = pbc and all(self.u.dimensions[:3]) - self.order = order - - # set up the donors/acceptors lists - if donors is None: - donors = () - if acceptors is None: - acceptors = () - self.forcefield = forcefield - self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) - self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) - - if self.selection1_type not in ('both', 'donor', 'acceptor'): - raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format( - self.selection1_type)) - - self._network = [] # final result accessed as self.network - self.timesteps = None # time for each frame - - self._log_parameters() - - def _log_parameters(self): - """Log important parameters to the logfile.""" - logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", - self.selection2, self.update_selection) - logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", - self.water_selection, self.update_water_selection) - logger.info("WaterBridgeAnalysis: criterion: donor %s atom and acceptor \ - atom distance <= %.3f A", self.distance_type, - self.distance) - logger.info("WaterBridgeAnalysis: criterion: angle D-H-A >= %.3f degrees", - self.angle) - logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ - acceptor names", self.forcefield) - - def _build_residue_dict(self, selection): - # Build the residue_dict where the key is the residue name - # The content is a dictionary where hydrogen bond donor heavy atom names is the key - # The content is the hydrogen bond donor hydrogen atom names - atom_group = self.u.select_atoms(selection) - for residue in atom_group.residues: - if not residue.resname in self._residue_dict: - self._residue_dict[residue.resname] = defaultdict(set) - for atom in residue.atoms: - if atom.name in self.donors: - self._residue_dict[residue.resname][atom.name].update(self._get_bonded_hydrogens(atom).names) - - def _update_donor_h(self, atom_ix, h_donors, donors_h): - atom = self.u.atoms[atom_ix] - residue = atom.residue - hydrogen_names = self._residue_dict[residue.resname][atom.name] - if hydrogen_names: - hydrogens = residue.atoms.select_atoms('name {0}'.format( - ' '.join(hydrogen_names))).ix - for atom in hydrogens: - h_donors[atom] = atom_ix - donors_h[atom_ix].append(atom) - - def _update_selection(self): - self._s1_donors = [] - self._s1_h_donors = {} - self._s1_donors_h = defaultdict(list) - self._s1_acceptors = [] - - self._s2_donors = [] - self._s2_h_donors = {} - self._s2_donors_h = defaultdict(list) - self._s2_acceptors = [] - - self._s1 = self.u.select_atoms(self.selection1).ix - self._s2 = self.u.select_atoms(self.selection2).ix - - if self.filter_first and len(self._s1): - self.logger_debug('Size of selection 1 before filtering:' - ' {} atoms'.format(len(self._s1))) - ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], box=self.box) - self._s1 = ns_selection_1.search(self.u.atoms[self._s2], self.selection_distance).ix - self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) - - if len(self._s1) == 0: - logger.warning('Selection 1 "{0}" did not select any atoms.' - .format(str(self.selection1)[:80])) - return - - if self.filter_first and len(self._s2): - self.logger_debug('Size of selection 2 before filtering:' - ' {} atoms'.format(len(self._s2))) - ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], box=self.box) - self._s2 = ns_selection_2.search(self.u.atoms[self._s1], self.selection_distance).ix - self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) - - if len(self._s2) == 0: - logger.warning('Selection 2 "{0}" did not select any atoms.' - .format(str(self.selection2)[:80])) - return - - if self.selection1_type in ('donor', 'both'): - self._s1_donors = self.u.atoms[self._s1].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._s1_donors: - self._update_donor_h(atom_ix, self._s1_h_donors, self._s1_donors_h) - self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) - self.logger_debug("Selection 1 donor hydrogens: {0}".format(len(self._s1_h_donors))) - if self.selection1_type in ('acceptor', 'both'): - self._s1_acceptors = self.u.atoms[self._s1].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) - - if len(self._s2) == 0: - return None - if self.selection1_type in ('donor', 'both'): - self._s2_acceptors = self.u.atoms[self._s2].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) - if self.selection1_type in ('acceptor', 'both'): - self._s2_donors = self.u.atoms[self._s2].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._s2_donors: - self._update_donor_h(atom_ix, self._s2_h_donors, self._s2_donors_h) - self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) - self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_h_donors))) - - def _update_water_selection(self): - self._water_donors = [] - self._water_h_donors = {} - self._water_donors_h = defaultdict(list) - self._water_acceptors = [] - - self._water = self.u.select_atoms(self.water_selection).ix - self.logger_debug('Size of water selection before filtering:' - ' {} atoms'.format(len(self._water))) - if len(self._water) and self.filter_first: - filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], box=self.box).search(self.u.atoms[self._s1], - self.selection_distance) - if filtered_s1: - self._water = AtomNeighborSearch(filtered_s1, box=self.box).search(self.u.atoms[self._s2], - self.selection_distance).ix - - self.logger_debug("Size of water selection: {0} atoms".format(len(self._water))) - - if len(self._water) == 0: - logger.warning("Water selection '{0}' did not select any atoms." - .format(str(self.water_selection)[:80])) - else: - self._water_donors = self.u.atoms[self._water].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._water_donors: - self._update_donor_h(atom_ix, self._water_h_donors, self._water_donors_h) - self.logger_debug("Water donors: {0}".format(len(self._water_donors))) - self.logger_debug("Water donor hydrogens: {0}".format(len(self._water_h_donors))) - self._water_acceptors = self.u.atoms[self._water].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Water acceptors: {0}".format(len(self._water_acceptors))) - - def _get_bonded_hydrogens(self, atom): - """Find hydrogens bonded within cutoff to `atom`. - - Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); - this is not fool-proof as the atom type is not always a character but - the name pattern should catch most typical occurrences. - - The distance from `atom` is calculated for all hydrogens in the residue - and only those within a cutoff are kept. The cutoff depends on the - heavy atom (more precisely, on its element, which is taken as the first - letter of its name ``atom.name[0]``) and is parameterized in - :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the - default of 1.5 Å is used. - - - Parameters - ---------- - atom : groups.Atom - heavy atom - - Returns - ------- - hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) - or empty list ``[]`` if none were found. - """ - try: - return atom.residue.atoms.select_atoms( - "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" - "".format(self.r_cov[atom.name[0]], atom.name)) - except NoDataError: - return [] - - def logger_debug(self, *args): - if self.debug: - logger.debug(*args) - - - def _prepare(self): - # The distance for selection is defined as twice the maximum bond length of an O-H bond (2A) - # plus order of water bridge times the length of OH bond plus hydrogne bond distance - self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) - - self.box = self.u.dimensions if self.pbc else None - self._residue_dict = {} - self._build_residue_dict(self.selection1) - self._build_residue_dict(self.selection2) - self._build_residue_dict(self.water_selection) - - self._update_selection() - - self.timesteps = [] - if len(self._s1) and len(self._s2): - self._update_water_selection() - else: - logger.info("WaterBridgeAnalysis: no atoms found in the selection.") - - logger.info("WaterBridgeAnalysis: initial checks passed.") - - logger.info("WaterBridgeAnalysis: starting") - logger.debug("WaterBridgeAnalysis: donors %r", self.donors) - logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) - logger.debug("WaterBridgeAnalysis: water bridge %r", self.water_selection) - - if self.debug: - logger.debug("Toggling debug to %r", self.debug) - else: - logger.debug("WaterBridgeAnalysis: For full step-by-step debugging output use debug=True") - - logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", - self.start, self.stop, self.step) - - def _donor2acceptor(self, donors, h_donors, acceptor): - if len(donors) == 0 or len(acceptor) == 0: - return [] - if self.distance_type != 'heavy': - donors_idx = list(h_donors.keys()) - else: - donors_idx = list(donors.keys()) - result = [] - # Code modified from p-j-smith - pairs, distances = capped_distance(self.u.atoms[donors_idx].positions, - self.u.atoms[acceptor].positions, - max_cutoff=self.distance, box=self.box, - return_distances=True) - if self.distance_type != 'heavy': - tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:,0]] - tmp_hydrogens = [donors_idx[idx] for idx in pairs[:,0]] - tmp_acceptors = [acceptor[idx] for idx in pairs[:,1]] - else: - tmp_donors = [] - tmp_hydrogens = [] - tmp_acceptors = [] - for idx in range(len(pairs[:,0])): - for h in donors[donors_idx[pairs[idx,0]]]: - tmp_donors.append(donors_idx[pairs[idx,0]]) - tmp_hydrogens.append(h) - tmp_acceptors.append(acceptor[pairs[idx,1]]) - - angles = np.rad2deg( - calc_angles( - self.u.atoms[tmp_donors].positions, - self.u.atoms[tmp_hydrogens].positions, - self.u.atoms[tmp_acceptors].positions, - box=self.box - ) - ) - hbond_indices = np.where(angles > self.angle)[0] - for index in hbond_indices: - h = tmp_hydrogens[index] - d = tmp_donors[index] - a = tmp_acceptors[index] - result.append((h, d, a, self._expand_index(h), self._expand_index(a), - distances[index], angles[index])) - return result - - def _single_frame(self): - self.timesteps.append(self._ts.time) - self.box = self.u.dimensions if self.pbc else None - - if self.update_selection: - self._update_selection() - if len(self._s1) and len(self._s2): - if self.update_water_selection: - self._update_water_selection() - else: - self._network.append(defaultdict(dict)) - return - - selection_1 = [] - water_pool = defaultdict(list) - next_round_water = set([]) - selection_2 = [] - - if self.selection1_type in ('donor', 'both'): - # check for direct hbond from s1 to s2 - self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)] = None - selection_1.append((h_index, d_index, a_index, None, dist, angle)) - selection_2.append((a_resname, a_resid)) - if self.order > 0: - self.logger_debug("Selection 1 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - selection_1.append((h_index, d_index, a_index, None, dist, angle)) - - self.logger_debug("Water Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s2_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) - selection_2.append((a_resname, a_resid)) - - if self.selection1_type in ('acceptor', 'both'): - self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._s1_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line - water_pool[(h_resname, h_resid)] = None - selection_1.append((a_index, None, h_index, d_index, dist, angle)) - selection_2.append((h_resname, h_resid)) - - if self.order > 0: - self.logger_debug("Selection 2 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) - selection_2.append((h_resname, h_resid)) - - self.logger_debug("Selection 1 Acceptors <-> Water Donors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s1_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - selection_1.append((a_index, None, h_index, d_index, dist, angle)) - - if self.order > 1: - self.logger_debug("Water donor <-> Water Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) - - # solve the connectivity network - # The following code attempt to generate a water network which is formed by the class dict. - # Suppose we have a water bridge connection ARG1 to ASP3 via the two hydrogen bonds. - # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], - # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], - # The resulting network will be - #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} - # Where the key of the a dict will be all the hydrogen bonds starting from this nodes. - # The corresponding value of a certain key will be a dictionary whose key will be all the hydrogen bonds from - # the destination of in the key. - # If the value of a certain key is None, which means it is reaching selection 2. - - result = {'start': defaultdict(dict), 'water': defaultdict(dict)} - - def add_route(result, route): - if len(route) == 1: - result['start'][route[0]] = None - else: - # exclude the the selection which goes back to itself - if (sorted(route[0][0:3:2]) == sorted(route[-1][0:3:2])): - return - - # selection 2 to water - result['water'][route[-1]] = None - # water to water - for i in range(1, len(route) - 1): - result['water'][route[i]][route[i+1]] = result['water'][route[i+1]] - # selection 1 to water - result['start'][route[0]][route[1]] = result['water'][route[1]] - - def traverse_water_network(graph, node, end, route, maxdepth, result): - if len(route) > self.order + 1: - return - else: - if node in end: - # check if any duplication happens - if len(route) == len(set(route)): - add_route(result, route) - else: - for new_node in graph[node]: - new_route = route[:] - new_route.append(new_node) - new_node = self._expand_timeseries(new_node,'sele1_sele2')[3][:2] - traverse_water_network(graph, new_node, end, new_route, maxdepth, result) - for s1 in selection_1: - route = [s1, ] - next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] - traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) - - self._network.append(result['start']) - - def _traverse_water_network(self, graph, current, analysis_func=None, output=None, link_func=None, **kwargs): - ''' - This function recursively traverses the water network self._network and finds the hydrogen bonds which connect - the current atom to the next atom. The newly found hydrogen bond will be appended to the hydrogen bonds - connecting the selection 1 to the current atom via link_func. When selection 2 is reached, the full list of - hydrogen bonds connecting the selection 1 to selection 2 will be fed into analysis_func, which will then modify - the output in place. - :param graph: The connection network describes the connection between the atoms in the water network. - :param current: The hydrogen bonds from selection 1 until now. - :param analysis_func: The analysis function which is called to analysis the hydrogen bonds. - :param output: where the result is stored. - :param link_func: The new hydrogen bonds will be appended to current. - :param kwargs: the keywords which are passed into the analysis_func. - :return: - ''' - if link_func is None: - # If no link_func is provided, the default link_func will be used - link_func = self._full_link - - if graph is None: - # if selection 2 is reached - if not analysis_func is None: - # the result is analysed by analysis_func which will change the output - analysis_func(current, output, self.u, **kwargs) - else: - # make sure no loop can occur - if len(current) <= self.order: - for node in graph: - # the new hydrogen bond will be added to the existing bonds - new = link_func(current, node) - self._traverse_water_network(graph[node], new, analysis_func, output, link_func, **kwargs) - - def _expand_index(self, index): - ''' - Expand the index into (resname, resid, name). - ''' - atom = self.u.atoms[index] - return (atom.resname, atom.resid, atom.name) - - def _expand_timeseries(self, entry, output_format=None): - ''' - Expand the compact data format into the old timeseries form. - The old is defined as the format for release up to 0.19.2. - As is discussed in Issue #2177, the internal storage of the hydrogen - bond information has been changed to the compact format. - The function takes in the argument `output_format` to see which output format will be chosen. - if `output_format` is not specified, the value will be taken from :attr:`output_format`. - If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: - - # donor from selection 1 to acceptor in selection 2 - [sele1_index, sele2_index, - (sele1_resname, sele1_resid, sele1_name), - (sele2_resname, sele2_resid, sele2_name), dist, angle] - - If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: - - # From donor to acceptor - [donor_index, acceptor_index, - (donor_resname, donor_resid, donor_name), - (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] - ''' - output_format = output_format or self.output_format - # Expand the compact entry into atom1, which is the first index in the output and atom2, which is the second - # entry. - atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry - if output_format == 'sele1_sele2': - # If the output format is the sele1_sele2, no change will be executed - atom1, atom2 = atom1, atom2 - elif output_format == 'donor_acceptor': - # If the output format is donor_acceptor, use heavy atom position to check which is donor and which is - # acceptor - if heavy_atom1 is None: - # atom1 is hydrogen bond acceptor and thus, the position of atom1 and atom2 are swapped. - atom1, atom2 = atom2, atom1 - else: - # atom1 is hydrogen bond donor, position not swapped. - atom1, atom2 = atom1, atom2 - else: - raise KeyError("Only 'sele1_sele2' or 'donor_acceptor' are allowed as output format") - - return (atom1, atom2, self._expand_index(atom1), self._expand_index(atom2), dist, angle) - - def _generate_timeseries(self, output_format=None): - r'''Time series of water bridges. - - The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. - The format of output can be changed via the output_format selection. - If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional - link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, - for each hydrogen bond, the donor is always written before the acceptor. - - Note - ---- - To find an acceptor atom in :attr:`Universe.atoms` by - *index* one would use ``u.atoms[acceptor_index]``. - - The :attr:`timeseries` is a managed attribute and it is generated - from the underlying data in :attr:`_network` every time the - attribute is accessed. It is therefore costly to call and if - :attr:`timeseries` is needed repeatedly it is recommended that you - assign to a variable:: - - w = WaterBridgeAnalysis(u) - w.run() - timeseries = w.timeseries - - .. versionchanged 0.20.0 - The :attr:`WaterBridgeAnalysis.timeseries` has been updated where - the donor and acceptor string has been changed to tuple - (resname string, resid, name_string). - - - ''' - output_format = output_format or self.output_format - def analysis(current, output, *args, **kwargs): - output = current - - timeseries = [] - for frame in self._network: - new_frame = [] - self._traverse_water_network(frame, new_frame, analysis_func=analysis, - output=new_frame, link_func=self._compact_link) - timeseries.append([self._expand_timeseries(entry, output_format) for entry in new_frame]) - return timeseries - - timeseries = property(_generate_timeseries) - - def _get_network(self): - r'''Network representation of the water network. - - The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. - Each hydrogen bond has a compact representation of :: - - [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] - - or :: - - [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] - - The donor_heavy_idx is the heavy atom bonding to the proton and atoms - can be retrived from the universe:: - - atom = u.atoms[idx] - - .. versionadded:: 0.20.0 - - ''' - return self._network - - def set_network(self, network): - self._network = network - - network = property(_get_network, set_network) - - @classmethod - def _full_link(self, output, node): - ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. - :param output: The existing hydrogen bonds from selection 1 - :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added - ''' - result = output[:] - result.append(node) - return result - - @classmethod - def _compact_link(self, output, node): - ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. - In this form no new list is created and thus, one bridge will only appear once. - :param output: The existing hydrogen bonds from selection 1 - :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added - ''' - output.append(node) - return output - - def _count_by_type_analysis(self, current, output, *args, **kwargs): - ''' - Generates the key for count_by_type analysis. - :return: - ''' - - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key] += 1 - - def count_by_type(self, analysis_func=None, **kwargs): - """Counts the frequency of water bridge of a specific type. - - If one atom *A* from *selection 1* is linked to atom *B* from - *selection 2* through one or more bridging waters, an entity will be created and - the proportion of time that this linkage exists in the whole simulation - will be calculated. - - The identification of a specific type of water bridge can be modified by - supplying the analysis_func function. See :ref:`wb_count_by_type` - for detail. - - Returns - ------- - counts : list - Returns a :class:`list` containing atom indices for *A* and - *B*, residue names, residue numbers, atom names (for both A and B) and - the fraction of the total time during which the water bridge was - detected. This method returns None if method - :meth:`WaterBridgeAnalysis.run` was not executed first. - - - """ - output = None - if analysis_func is None: - analysis_func = self._count_by_type_analysis - output = 'combined' - - if self._network: - length = len(self._network) - result_dict = defaultdict(int) - for frame in self._network: - frame_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=frame_dict, - link_func=self._full_link, **kwargs) - for key, value in frame_dict.items(): - result_dict[key] += frame_dict[key] - - if output == 'combined': - result = [[i for i in key] for key in result_dict] - [result[i].append(result_dict[key]/length) for i, key in enumerate(result_dict)] - else: - result = [(key, result_dict[key]/length) for key in result_dict] - return result - else: - return None - - def _count_by_time_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key] += 1 - - def count_by_time(self, analysis_func=None, **kwargs): - """Counts the number of water bridges per timestep. - - The counting behaviour can be adjusted by supplying analysis_func. - See :ref:`wb_count_by_time` for details. - - Returns - ------- - counts : list - Returns a time series ``N(t)`` where ``N`` is the total - number of observed water bridges at time ``t``. - - """ - if analysis_func is None: - analysis_func = self._count_by_time_analysis - if self._network: - result = [] - for time, frame in zip(self.timesteps, self._network): - result_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=result_dict, - link_func=self._full_link, **kwargs) - result.append((time, sum([result_dict[key] for key in result_dict]))) - return result - else: - return None - - def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key].append(kwargs.pop('time')) - - def timesteps_by_type(self, analysis_func=None, **kwargs): - """Frames during which each water bridges existed, sorted by each water bridges. - - Processes :attr:`WaterBridgeAnalysis._network` and returns a - :class:`list` containing atom indices, residue names, residue - numbers (from selection 1 and selection 2) and each timestep at which the - water bridge was detected. - - Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and - :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by - supplying an analysis_func. - - Returns - ------- - data : list - - """ - output = None - if analysis_func is None: - analysis_func = self._timesteps_by_type_analysis - output = 'combined' - - if self._network: - result = defaultdict(list) - if self.timesteps is None: - timesteps = range(len(self._network)) - else: - timesteps = self.timesteps - for time, frame in zip(timesteps, self._network): - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=result, - link_func=self._full_link, time=time, **kwargs) - - result_list = [] - for key, time_list in six.iteritems(result): - for time in time_list: - if output == 'combined': - key = list(key) - key.append(time) - result_list.append(key) - else: - result_list.append((key, time)) - return result_list - else: - return None - - def generate_table(self, output_format=None): - """Generate a normalised table of the results. - - The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~WaterBridgeAnalysis.table`. - - The output format of :attr:`~WaterBridgeAnalysis.table` can also be - changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` - """ - output_format = output_format or self.output_format - if self._network == []: - msg = "No data computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warning(msg) - return None - timeseries = self._generate_timeseries(output_format) - - num_records = np.sum([len(hframe) for hframe in timeseries]) - # build empty output table - if output_format == 'sele1_sele2': - dtype = [ - ("time", float), - ("sele1_index", int), ("sele2_index", int), - ("sele1_resnm", "|U4"), ("sele1_resid", int), ("sele1_atom", "|U4"), - ("sele2_resnm", "|U4"), ("sele2_resid", int), ("sele2_atom", "|U4"), - ("distance", float), ("angle", float)] - elif output_format == 'donor_acceptor': - dtype = [ - ("time", float), - ("donor_index", int), ("acceptor_index", int), - ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), - ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), - ("distance", float), ("angle", float)] - - # according to Lukas' notes below, using a recarray at this stage is ineffective - # and speedups of ~x10 can be achieved by filling a standard array, like this: - out = np.empty((num_records,), dtype=dtype) - cursor = 0 # current row - for t, hframe in zip(self.timesteps, timeseries): - for (donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - # donor|acceptor = (resname, resid, atomid) - out[cursor] = (t, donor_index, acceptor_index) + \ - donor + acceptor + (distance, angle) - cursor += 1 - assert cursor == num_records, "Internal Error: Not all wb records stored" - table = out.view(np.recarray) - logger.debug("WBridge: Stored results as table with %(num_records)d entries.", vars()) - self.table = table +with warnings.catch_warnings(): + warnings.simplefilter('always', DeprecationWarning) + warnings.warn(("This module has been moved to " + "MDAnalysis.analysis.hydrogenbonds.wbridge_analysis " + "It will be removed in release 2.0"), + category=DeprecationWarning) + +from ..hydrogenbonds.wbridge_analysis import WaterBridgeAnalysis diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index e77893e5d2e..a39a0f15acc 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -27,3 +27,4 @@ ] from .hbond_analysis import HydrogenBondAnalysis +from .wbridge_analysis import WaterBridgeAnalysis diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py new file mode 100644 index 00000000000..074955a1f17 --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -0,0 +1,1614 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +# Water Bridge Analysis +r"""Water Bridge analysis --- +mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` +=============================================================================== +:Author: Zhiyi Wu +:Year: 2017-2018 +:Copyright: GNU Public License v3 +:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub +.. _`@xiki-tempula`: https://github.com/xiki-tempula +Given a :class:`~MDAnalysis.core.universe.Universe` (simulation +trajectory with 1 or more frames) measure all water bridges for each +frame between selections 1 and 2. +Water bridge is defined as a bridging water which simultaneously forms +two hydrogen bonds with atoms from both selection 1 and selection 2. +A water bridge can form between two hydrogen bond acceptors. +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O−H···:\ :sup:`-`\ O\ :sub:`2`\ C- +A water bridge can also form between two hydrogen bond donors. +e.g. -NH···:O:···HN- (where O is the oxygen of a bridging water) +A hydrogen bond acceptor and another hydrogen bond donor can be bridged by a +water. +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) +A higher order water bridge is defined as more than one water bridging +hydrogen bond acceptor and donor. An example of a second order water bridge: +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- +(where H−O is part of **H−O**\ −H) +The following keyword arguments are important to control the behaviour of the +water bridge analysis: + - *water_selection* (``resname SOL``): the selection string for the bridging + water + - *order* the maximum number of water bridging both ends + - donor-acceptor *distance* (Å): 3.0 + - Angle *cutoff* (degrees): 120.0 + - *forcefield* to switch between default values for different force fields + - *donors* and *acceptors* atom types (to add additional atom names) +Theory +------ +This module attempts to find multi-order water bridge by an approach similar +to breadth-first search, where the first solvation shell of selection 1 is +selected, followed by the selection of the second solvation shell as well as +any hydrogen bonding partner from selection 1. After that, the third solvation +shell, as well as any binding partners from selection 2, are detected. This +process is repeated until the maximum order of water bridges is reached. +.. _wb_Analysis_Network: +Output as Network +----------------- +Since the waters connecting the two ends of the selections are by nature a +network. We provide a network representation of the water network. Water bridge +data are returned per frame, which is stored in +:attr:`WaterBridgeAnalysis.network`. Each frame is represented as a dictionary, +where the keys are the hydrogen bonds originating from selection 1 and the +values are new dictionaries representing the hydrogen bonds coming out of the +corresponding molecules making hydrogen bonds with selection 1. +As for the hydrogen bonds which reach the selection 2, the values of the +corresponding keys are None. One example where selection 1 and selection 2 are +joined by one water molecule (A) which also hydrogen bond to another water (B) +which also hydrogen bond to selection 2 would be represented as :: + # (selection 1)-O:···H-O(water 1):···H-(selection 2) + # | : + # H·············O-H(water2) + # H + {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): + {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, + distance, angle): None}, + {(water1_donor, water1_donor_heavy, water2_acceptor, None, + distance, angle): + {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, + distance, angle): None} + }, + } +The atoms are represented by atom index and if the atom is hydrogen bond donor, +it is followed by the index of the corresponding heavy atom +``(donor_proton, donor_heavy_atom)``. +If the atom is a hydrogen bond acceptor, it is followed by none. +.. _wb_Analysis_Timeseries: +Output as Timeseries +-------------------- +For lower order water bridges, it might be desirable to represent the +connections as :attr:`WaterBridgeAnalysis.timeseries`. The results are returned +per frame and are a list of hydrogen bonds between the selection 1 or selection +2 and the bridging waters. Due to the complexity of the higher order water +bridge and the fact that one hydrogen bond between two waters can appear in +both third and fourth order water bridge, the hydrogen bonds in the +:attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first +search manner to avoid duplication. Example code of how +:attr:`WaterBridgeAnalysis.timeseries` is generated:: + def network2timeseries(network, timeseries): + '''Traverse the network in a depth-first fashion. + expand_timeseries will expand the compact representation to the + familiar timeseries representation.''' + if network is None: + return + else: + for node in network: + timeseries.append(expand_timeseries(node)) + network2timeseries(network[node], timeseries) + timeseries = [] + network2timeseries(network, timeseries) +An example would be. :: + results = [ + [ # frame 1 + [ , , + (, , ), + (, , + ), + , ], + .... + ], + [ # frame 2 + [ ... ], [ ... ], ... + ], + ... + ] +Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat +the results as a flat "normalised" table that is easier to import into a +database or dataframe for further processing. +Detection of water bridges +-------------------------- +Water bridges are recorded if a bridging water simultaneously forms +hydrogen bonds with selection 1 and selection 2. +Hydrogen bonds are detected based on a geometric criterion: +1. The distance between acceptor and hydrogen is less than or equal to + `distance` (default is 3 Å). +2. The angle between donor-hydrogen-acceptor is greater than or equal to + `angle` (default is 120º). +The cut-off values `angle` and `distance` can be set as keywords to +:class:`WaterBridgeAnalysis`. +Donor and acceptor heavy atoms are detected from atom names. The current +defaults are appropriate for the CHARMM27 and GLYCAM06 force fields as defined +in Table `Default atom names for water bridge analysis`_. +Hydrogen atoms bonded to a donor are searched based on its distance to the +donor. The algorithm searches for all hydrogens +(name "H*" or name "[123]H" or type "H") in the same residue as the donor atom +within a cut-off distance of 1.2 Å. +.. _Default atom names for water bridge analysis: +.. table:: Default heavy atom names for CHARMM27 force field. + =========== ============== =========== ==================================== + group donor acceptor comments + =========== ============== =========== ==================================== + main chain N O, OC1, OC2 OC1, OC2 from amber99sb-ildn + (Gromacs) + water OH2, OW OH2, OW SPC, TIP3P, TIP4P (CHARMM27,Gromacs) + ARG NE, NH1, NH2 + ASN ND2 OD1 + ASP OD1, OD2 + CYS SG + CYH SG possible false positives for CYS + GLN NE2 OE1 + GLU OE1, OE2 + HIS ND1, NE2 ND1, NE2 presence of H determines if donor + HSD ND1 NE2 + HSE NE2 ND1 + HSP ND1, NE2 + LYS NZ + MET SD see e.g. [Gregoret1991]_ + SER OG OG + THR OG1 OG1 + TRP NE1 + TYR OH OH + =========== ============== =========== ==================================== +.. table:: Heavy atom types for GLYCAM06 force field. + =========== =========== ================== + element donor acceptor + =========== =========== ================== + N N,NT,N3 N,NT + O OH,OW O,O2,OH,OS,OW,OY + S SM + =========== =========== ================== +Donor and acceptor names for the CHARMM27 force field will also work for e.g. +OPLS/AA or amber (tested in Gromacs). Residue names in the table are for +information only and are not taken into account when determining acceptors and +donors. This can potentially lead to some ambiguity in the assignment of +donors/acceptors for residues such as histidine or cytosine. +For more information about the naming convention in GLYCAM06 have a look at the +`Carbohydrate Naming Convention in Glycam`_. +.. _`Carbohydrate Naming Convention in Glycam`: + http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm +The lists of donor and acceptor names can be extended by providing lists of +atom names in the `donors` and `acceptors` keywords to +:class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate +(e.g. when analysing simulations done with a force field that uses very +different atom names) then one should either use the value "other" for +`forcefield` to set no default values or derive a new class and set the +default list oneself:: + class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): + DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} + DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} +Then simply use the new class instead of the parent class and call it with +```forcefield` = "OtherFF"``. Please also consider contributing the list of +heavy atom names to MDAnalysis. +How to perform WaterBridgeAnalysis +---------------------------------- +All water bridges between arginine and aspartic acid can be analysed with :: + import MDAnalysis + import MDAnalysis.analysis.hbonds + u = MDAnalysis.Universe('topology', 'trajectory') + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', + 'resname ASP') + w.run() +The maximum number of bridging waters detected can be changed using the order +keyword. :: + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', + 'resname ASP', order=3) +Thus, a maximum of three bridging waters will be detected. +An example of using the :attr:`~WaterBridgeAnalysis` would be +detecting the percentage of time a certain water bridge exits. +Trajectory :code:`u` has two frames, where the first frame contains a water +bridge from the oxygen of the first arginine to one of the oxygens in the +carboxylic group of aspartate (ASP3:OD1). In the second frame, the same water +bridge forms but is between the oxygen of the arginine and the other oxygen in +the carboxylic group (ASP3:OD2). :: + # index residue id residue name atom name + # 0 1 ARG O + # 1 2 SOL OW + # 2 2 SOL HW1 + # 3 2 SOL HW2 + # 4 3 ASP OD1 + # 5 3 ASP OD2 + print(w.timeseries) +prints out. :: + [ # frame 1 + # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,4,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], + ], + # frame 2 + # Another water bridge SOL2 links O from ARG1 to the other oxygen of the + # carboxylic group OD2 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], + ], + ] +.. _wb_count_by_type: +Use count_by_type +----------------- +We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to +generate the frequence of all water bridges in the simulation. :: + w.count_by_type() +Returns :: + [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), + (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] +You might think that the OD1 and OD2 are the same oxygen and the aspartate has +just flipped and thus, they should be counted as the same type of water bridge. +The type of the water bridge can be customised by supplying an analysis +function to :meth:`~WaterBridgeAnalysis.count_by_type`. +The analysis function has two parameters. The current and the output. The +current is a list of hydrogen bonds from selection 1 to selection 2, formatted +in the same fashion as :attr:`WaterBridgeAnalysis.network`, and an example will +be :: + [ + # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. + [ 0, None, 2, 1, 3.0,180], + # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. + [ 3, 1, 4, None, 3.0,180],] +Where ``current[0]`` is the first hydrogen bond originating from selection 1 +and ``current[-1]`` is the final hydrogen bond ending in selection 2. The +output sums up all the information in the current frame and is a dictionary +with a user-defined key and the value is the weight of the corresponding key. +During the analysis phase, the function analysis iterates through all the water +bridges and modify the output in-place. At the end of the analysis, the keys +from all the frames are collected and the corresponding values will be summed +up and returned. :: + def analysis(current, output, u): + r'''This function defines how the type of water bridge should be + specified. + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms.''' + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) + # if the residue name is ASP and the atom name is OD2 or OD1, + # the atom name is changed to OD + if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): + s2_name = 'OD' + # setting up the key which defines this type of water bridge. + key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + w.count_by_type(analysis_func=analysis) +Returns :: + [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] +Note that the result is arranged in the format of +``(key, the proportion of time)``. When no custom analysis function is supplied +, the key is expanded and is formatted as :: + [('ARG', 1, 'O', 'ASP', 3, 'OD', 1.0),] +Some people might only interested in contacts between residues and pay no +attention to the details regarding the atom name. However, since multiple water +bridges can exist between two residues, which sometimes can give a result such +that the water bridge between two residues exists 300% of the time. Though this +might be a desirable result for some people, others might want the water bridge +between two residues to be only counted once per frame. This can also be +achieved by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified + . + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) + # s1_name and s2_name are not included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid) + # Each residue is only counted once per frame + output[key] = 1 + w.count_by_type(analysis_func=analysis) +Returns :: + [(('ARG', 1, 'ASP', 3), 1.0),] +On the other hand, other people may insist that the first order and +second-order water bridges shouldn't be mixed together, which can also be +achieved by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified + . + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) + # order of the current water bridge is computed + order_of_water_bridge = len(current) - 1 + # and is included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + w.count_by_type(analysis_func=analysis) +The extra number 1 precede the 1.0 indicate that this is a first order water +bridge :: + [(('ARG', 1, 'ASP', 3, 1), 1.0),] +Some people might not be interested in the interactions related to arginine. +The undesirable interactions can be discarded by supplying an analysis function +to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): + '''This function defines how the type of water bridge should be + specified. + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the number of this type of + water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) + if not s1_resname == 'ARG': + key = (s1_resname, s1_resid, s2_resname, s2_resid) + output[key] += 1 + w.count_by_type(analysis_func=analysis) +Returns nothing in this case :: + [,] +Additional keywords can be supplied to the analysis function by passing through +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, **kwargs): + ... + w.count_by_type(analysis_func=analysis, **kwargs) +.. _wb_count_by_time: +Use count_by_time +----------------- +:meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which +might be desirable in some cases but not the others. +:meth:`~WaterBridgeAnalysis.count_by_time` provides additional functionality +for aggregating results for each frame. +The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting +the number of water bridges from selection 1 to selection 2 for each frame. +Take the previous ASP, ARG salt bridge for example: :: + w.count_by_time() +As one water bridge is found in both frames, the method returns :: + [(1.0, 1), (2.0, 1), ] +Similar to :meth:`~WaterBridgeAnalysis.count_by_type` +The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be +modified by supplying an analysis function. +Suppose we want to count + - the **number** of water molecules involved in bridging selection 1 to + selection 2. + - only if water bridge terminates in atom name **OD1 of ASP**. + - only when water bridge is joined by less than **two** water. +The analysis function can be written as:: + def analysis(current, output, u, **kwargs): + '''This function defines how the counting of water bridge should be + specified. + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the number of this type of + water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = + (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = + (sele2.resname, sele2.resid, sele2.name) + # only the residue name is ASP and the atom name is OD1, + if s2_resname == 'ASP' and s2_name == 'OD1': + # only if the order of water bridge is less than 2 + if len(current) -1 < 2: + # extract all water molecules involved in the water bridge + # extract the first water from selection 1 + s1_index, to_index, (s1_resname, s1_resid, s1_name), + (to_resname, to_resid, to_name), dist, angle = current[0] + key = (to_resname, to_resid) + output[key] = 1 + # extract all the waters between selection 1 and selection 2 + for hbond in current[1:-1]: + # decompose the hydrogen bond. + from_index, to_index, (from_resname, from_resid, from_name), + (to_resname, to_resid, to_name), dist, angle = hbond + # add first water + key1 = (from_resname, from_resid) + output[key1] = 1 + # add second water + key2 = (to_resname, to_resid) + output[key2] = 1 + # extract the last water to selection 2 + from_index, s2_index, (from_resname, from_resid, from_name), + (s2_resname, s2_resid, s2_name), dist, angle = current[-1] + key = (from_resname, from_resid) + output[key] = 1 + w.count_by_time(analysis_func=analysis) +Returns :: + [(1.0, 1), (2.0, 0),] +Classes +------- +.. autoclass:: WaterBridgeAnalysis + :members: + .. attribute:: timesteps + List of the times of each timestep. This can be used together with + :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point + of a water bridge existence. +""" +from collections import defaultdict +import logging +import warnings +import numpy as np + +from ..base import AnalysisBase +from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch +from MDAnalysis.lib.distances import capped_distance, calc_angles +from MDAnalysis import NoDataError, MissingDataWarning, SelectionError +from MDAnalysis.lib import distances + +logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') + + +class WaterBridgeAnalysis(AnalysisBase): + """Perform a water bridge analysis + The analysis of the trajectory is performed with the + :meth:`WaterBridgeAnalysis.run` method. The result is stored in + :attr:`WaterBridgeAnalysis.timeseries`. See + :meth:`~WaterBridgeAnalysis.run` for the format. + .. versionadded:: 0.17.0 + """ + # use tuple(set()) here so that one can just copy&paste names from the + # table; set() takes care for removing duplicates. At the end the + # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. + + #: default heavy atom names whose hydrogens are treated as *donors* + #: (see :ref:`Default atom names for water bridge analysis`); + #: use the keyword `donors` to add a list of additional donor names. + DEFAULT_DONORS = { + 'CHARMM27': tuple( + {'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', + 'NZ', 'OG', 'OG1', 'NE1', 'OH'}), + 'GLYCAM06': tuple({'N', 'NT', 'N3', 'OH', 'OW'}), + 'other': tuple(set([]))} + + #: default atom names that are treated as hydrogen *acceptors* + #: (see :ref:`Default atom names for water bridge analysis`); + #: use the keyword `acceptors` to add a list of additional acceptor names. + DEFAULT_ACCEPTORS = { + 'CHARMM27': tuple( + {'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', + 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'}), + 'GLYCAM06': + tuple({'N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'}), + 'other': tuple(set([]))} + + #: A :class:`collections.defaultdict` of covalent radii of common donors + #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is + #: sufficiently close to its donor heavy atom). Values are stored for + #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens + #: covalently bound at a maximum distance of 1.5 Å. + r_cov = defaultdict(lambda: 1.5, # default value + N=1.31, O=1.31, P=1.58, S=1.55) + + def __init__(self, universe, selection1='protein', + selection2='not resname SOL', water_selection='resname SOL', + order=1, selection1_type='both', update_selection=False, + update_water_selection=True, filter_first=True, + distance_type='hydrogen', distance=3.0, angle=120.0, + forcefield='CHARMM27', donors=None, acceptors=None, + output_format="sele1_sele2", debug=None, + pbc=False, **kwargs): + """Set up the calculation of water bridges between two selections in a + universe. + The timeseries is accessible as the attribute + :attr:`WaterBridgeAnalysis.timeseries`. + If no hydrogen bonds are detected or if the initial check fails, look + at the log output (enable with :func:`MDAnalysis.start_logging` and set + `verbose` ``=True``). It is likely that the default names for donors + and acceptors are not suitable (especially for non-standard + ligands). In this case, either change the `forcefield` or use + customized `donors` and/or `acceptors`. + Parameters + ---------- + universe : Universe + Universe object + selection1 : str (optional) + Selection string for first selection ['protein'] + selection2 : str (optional) + Selection string for second selection ['not resname SOL'] + This string selects everything except water where water is assumed + to have a residue name as SOL. + water_selection : str (optional) + Selection string for bridging water selection ['resname SOL'] + The default selection assumes that the water molecules have residue + name "SOL". Change it to the appropriate selection for your + specific force field. + However, in theory, this selection can be anything which forms + a hydrogen bond with selection 1 and selection 2. + order : int (optional) + The maximum number of water bridges linking both selections. + if the order is set to 3, then all the residues linked with less + than three water molecules will be detected. [1] + Computation of high order water bridges can be very time-consuming. + Think carefully before running the calculation, do you really want + to compute the 20th order water bridge between domain A and domain + B or you just want to know the third order water bridge between two + residues. + selection1_type : {"donor", "acceptor", "both"} (optional) + Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the + value for `selection1_type` automatically determines how + `selection2` handles donors and acceptors: If `selection1` contains + 'both' then `selection2` will also contain 'both'. If `selection1` + is set to 'donor' then `selection2` is 'acceptor' (and vice versa). + ['both']. + update_selection : bool (optional) + Update selection 1 and 2 at each frame. Setting to ``True`` if the + selection is not static. Selections are filtered first to speed up + performance. Thus, setting to ``True`` is recommended if contact + surface between selection 1 and selection 2 is constantly + changing. [``False``] + update_water_selection : bool (optional) + Update selection of water at each frame. Setting to ``False`` is + **only** recommended when the total amount of water molecules in + the simulation are small and when water molecules remain static + across the simulation. + However, in normal simulations, only a tiny proportion of water is + engaged in the formation of water bridge. It is recommended to + update the water selection and set keyword `filter_first` to + ``True`` so as to filter out water not residing between the two + selections. [``True``] + filter_first : bool (optional) + Filter the water selection to only include water within 4 Å + + `order` * (2 Å + `distance`) away from `both` selection 1 and + selection 2. + Selection 1 and selection 2 are both filtered to only include atoms + with the same distance away from the other selection. [``True``] + distance : float (optional) + Distance cutoff for hydrogen bonds; only interactions with a H-A + distance <= `distance` (and the appropriate D-H-A angle, see + `angle`) are recorded. (Note: `distance_type` can change this to + the D-A distance.) [3.0] + angle : float (optional) + Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of + 180º. A hydrogen bond is only recorded if the D-H-A angle is + >= `angle`. The default of 120º also finds fairly non-specific + hydrogen interactions and possibly better value is 150º. [120.0] + forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) + Name of the forcefield used. Switches between different + :attr:`~DEFAULT_DONORS` and + :attr:`~DEFAULT_ACCEPTORS` values. + ["CHARMM27"] + donors : sequence (optional) + Extra H donor atom types (in addition to those in + :attr:`~DEFAULT_DONORS`), must be a sequence. + acceptors : sequence (optional) + Extra H acceptor atom types (in addition to those in + :attr:`~DEFAULT_ACCEPTORS`), must be a + sequence. + distance_type : {"hydrogen", "heavy"} (optional) + Measure hydrogen bond lengths between donor and acceptor heavy + atoms ("heavy") or between donor hydrogen and acceptor heavy + atom ("hydrogen"). If using "heavy" then one should set the + *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] + output_format: {"sele1_sele2", "donor_acceptor"} (optional) + Setting the output format for timeseries and table. If set to + "sele1_sele2", for each hydrogen bond, the one close to selection 1 + will be placed before selection 2. If set to "donor_acceptor", the + donor will be placed before acceptor. "sele1_sele2"] + debug : bool (optional) + If set to ``True`` enables per-frame debug logging. This is + disabled by default because it generates a very large amount of + output in the log file. (Note that a logger must have been started + to see the output, e.g. using :func:`MDAnalysis.start_logging`.) + verbose : bool (optional) + Toggle progress output. (Can also be given as keyword argument to + :meth:`run`.) + Notes + ----- + In order to speed up processing, atoms are filtered by a coarse + distance criterion before a detailed hydrogen bonding analysis is + performed (`filter_first` = ``True``). + If selection 1 and selection 2 are very mobile during the simulation + and the contact surface is constantly changing (i.e. residues are + moving farther than 4 Å + `order` * (2 Å + `distance`)), you might + consider setting the `update_selection` keywords to ``True`` + to ensure correctness. + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated + see :attr:`WaterBridgeAnalysis.timeseries` for detail. + This class is now based on + :class:`~MDAnalysis.analysis.base.AnalysisBase`. + """ + super(WaterBridgeAnalysis, self).__init__(universe.trajectory, + **kwargs) + self.water_selection = water_selection + self.update_water_selection = update_water_selection + # per-frame debugging output? + self.debug = debug + + # set the output format + self.output_format = output_format + + self.u = universe + self.selection1 = selection1 + self.selection2 = selection2 + self.selection1_type = selection1_type + + # if the selection 1 and selection 2 are the same + if selection1 == selection2: + # eliminate the duplication + self.selection1_type = "donor" + self.update_selection = update_selection + self.filter_first = filter_first + self.distance = distance + self.distance_type = distance_type # note: everything except 'heavy' + # will give the default behavior + self.angle = angle + self.pbc = pbc and all(self.u.dimensions[:3]) + self.order = order + + # set up the donors/acceptors lists + if donors is None: + donors = () + if acceptors is None: + acceptors = () + self.forcefield = forcefield + self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) + self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union( + acceptors)) + + if self.selection1_type not in ('both', 'donor', 'acceptor'): + raise ValueError('WaterBridgeAnalysis: ' + 'Invalid selection type {0!s}'.format( + self.selection1_type)) + + self._network = [] # final result accessed as self.network + self.timesteps = None # time for each frame + + self._log_parameters() + + def _log_parameters(self): + """Log important parameters to the logfile.""" + logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", + self.selection2, self.update_selection) + logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", + self.water_selection, self.update_water_selection) + logger.info("WaterBridgeAnalysis: criterion: donor %s atom and " + "acceptor atom distance <= %.3f A", self.distance_type, + self.distance) + logger.info("WaterBridgeAnalysis: criterion: " + "angle D-H-A >= %.3f degrees", + self.angle) + logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ + acceptor names", self.forcefield) + + def _build_residue_dict(self, selection): + # Build the residue_dict where the key is the residue name + # The content is a dictionary where hydrogen bond donor heavy atom + # names is the key + # The content is the hydrogen bond donor hydrogen atom names + atom_group = self.u.select_atoms(selection) + for residue in atom_group.residues: + if residue.resname not in self._residue_dict: + self._residue_dict[residue.resname] = defaultdict(set) + for atom in residue.atoms: + if atom.name in self.donors: + self._residue_dict[residue.resname][atom.name].update( + self._get_bonded_hydrogens(atom).names) + + def _update_donor_h(self, atom_ix, h_donors, donors_h): + atom = self.u.atoms[atom_ix] + residue = atom.residue + hydrogen_names = self._residue_dict[residue.resname][atom.name] + if hydrogen_names: + hydrogens = residue.atoms.select_atoms('name {0}'.format( + ' '.join(hydrogen_names))).ix + for atom in hydrogens: + h_donors[atom] = atom_ix + donors_h[atom_ix].append(atom) + + def _update_selection(self): + self._s1_donors = [] + self._s1_h_donors = {} + self._s1_donors_h = defaultdict(list) + self._s1_acceptors = [] + + self._s2_donors = [] + self._s2_h_donors = {} + self._s2_donors_h = defaultdict(list) + self._s2_acceptors = [] + + self._s1 = self.u.select_atoms(self.selection1).ix + self._s2 = self.u.select_atoms(self.selection2).ix + + if self.filter_first and len(self._s1): + self.logger_debug('Size of selection 1 before filtering:' + ' {} atoms'.format(len(self._s1))) + ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], + box=self.box) + self._s1 = ns_selection_1.search(self.u.atoms[self._s2], + self.selection_distance).ix + self.logger_debug("Size of selection 1: {0} atoms".format( + len(self._s1))) + + if len(self._s1) == 0: + logger.warning('Selection 1 "{0}" did not select any atoms.' + .format(str(self.selection1)[:80])) + return + + if self.filter_first and len(self._s2): + self.logger_debug('Size of selection 2 before filtering:' + ' {} atoms'.format(len(self._s2))) + ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], + box=self.box) + self._s2 = ns_selection_2.search(self.u.atoms[self._s1], + self.selection_distance).ix + self.logger_debug('Size of selection 2: {0} atoms'.format( + len(self._s2))) + + if len(self._s2) == 0: + logger.warning('Selection 2 "{0}" did not select any atoms.' + .format(str(self.selection2)[:80])) + return + + if self.selection1_type in ('donor', 'both'): + self._s1_donors = self.u.atoms[self._s1].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._s1_donors: + self._update_donor_h(atom_ix, self._s1_h_donors, + self._s1_donors_h) + self.logger_debug("Selection 1 donors: {0}".format( + len(self._s1_donors))) + self.logger_debug("Selection 1 donor hydrogens: {0}".format( + len(self._s1_h_donors))) + if self.selection1_type in ('acceptor', 'both'): + self._s1_acceptors = self.u.atoms[self._s1].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Selection 1 acceptors: {0}".format( + len(self._s1_acceptors))) + + if len(self._s2) == 0: + return None + if self.selection1_type in ('donor', 'both'): + self._s2_acceptors = self.u.atoms[self._s2].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Selection 2 acceptors: {0:d}".format( + len(self._s2_acceptors))) + if self.selection1_type in ('acceptor', 'both'): + self._s2_donors = self.u.atoms[self._s2].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._s2_donors: + self._update_donor_h(atom_ix, self._s2_h_donors, + self._s2_donors_h) + self.logger_debug("Selection 2 donors: {0:d}".format( + len(self._s2_donors))) + self.logger_debug("Selection 2 donor hydrogens: {0:d}".format( + len(self._s2_h_donors))) + + def _update_water_selection(self): + self._water_donors = [] + self._water_h_donors = {} + self._water_donors_h = defaultdict(list) + self._water_acceptors = [] + + self._water = self.u.select_atoms(self.water_selection).ix + self.logger_debug('Size of water selection before filtering:' + ' {} atoms'.format(len(self._water))) + if len(self._water) and self.filter_first: + filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], + box=self.box).search( + self.u.atoms[self._s1], self.selection_distance) + if filtered_s1: + self._water = AtomNeighborSearch(filtered_s1, + box=self.box).search( + self.u.atoms[self._s2], self.selection_distance).ix + + self.logger_debug("Size of water selection: {0} atoms".format( + len(self._water))) + + if len(self._water) == 0: + logger.warning("Water selection '{0}' did not select any atoms." + .format(str(self.water_selection)[:80])) + else: + self._water_donors = self.u.atoms[self._water].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._water_donors: + self._update_donor_h(atom_ix, self._water_h_donors, + self._water_donors_h) + self.logger_debug("Water donors: {0}".format( + len(self._water_donors))) + self.logger_debug("Water donor hydrogens: {0}".format( + len(self._water_h_donors))) + self._water_acceptors = self.u.atoms[self._water].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Water acceptors: {0}".format( + len(self._water_acceptors))) + + def _get_bonded_hydrogens(self, atom): + """Find hydrogens bonded within cutoff to `atom`. + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); + this is not fool-proof as the atom type is not always a character but + the name pattern should catch most typical occurrences. + The distance from `atom` is calculated for all hydrogens in the residue + and only those within a cutoff are kept. The cutoff depends on the + heavy atom (more precisely, on its element, which is taken as the first + letter of its name ``atom.name[0]``) and is parameterized in + :attr:`WaterBridgeAnalysis.r_cov`. If no match is found then the + default of 1.5 Å is used. + Parameters + ---------- + atom : groups.Atom + heavy atom + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a + :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + """ + try: + return atom.residue.atoms.select_atoms( + "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" + "".format(self.r_cov[atom.name[0]], atom.name)) + except NoDataError: + return [] + + def logger_debug(self, *args): + if self.debug: + logger.debug(*args) + + def _prepare(self): + # The distance for selection is defined as twice the maximum bond + # length of an O-H bond (2A) plus order of water bridge times the + # length of OH bond plus hydrogne bond distance + self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) + + self.box = self.u.dimensions if self.pbc else None + self._residue_dict = {} + self._build_residue_dict(self.selection1) + self._build_residue_dict(self.selection2) + self._build_residue_dict(self.water_selection) + + self._update_selection() + + self.timesteps = [] + if len(self._s1) and len(self._s2): + self._update_water_selection() + else: + logger.info("WaterBridgeAnalysis: " + "no atoms found in the selection.") + + logger.info("WaterBridgeAnalysis: initial checks passed.") + + logger.info("WaterBridgeAnalysis: starting") + logger.debug("WaterBridgeAnalysis: donors %r", self.donors) + logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) + logger.debug("WaterBridgeAnalysis: water bridge %r", + self.water_selection) + + if self.debug: + logger.debug("Toggling debug to %r", self.debug) + else: + logger.debug("WaterBridgeAnalysis: For full step-by-step " + "debugging output use debug=True") + + logger.info("Starting analysis " + "(frame index start=%d stop=%d, step=%d)", + self.start, self.stop, self.step) + + def _donor2acceptor(self, donors, h_donors, acceptor): + if len(donors) == 0 or len(acceptor) == 0: + return [] + if self.distance_type != 'heavy': + donors_idx = list(h_donors.keys()) + else: + donors_idx = list(donors.keys()) + result = [] + # Code modified from p-j-smith + pairs, distances = capped_distance(self.u.atoms[donors_idx].positions, + self.u.atoms[acceptor].positions, + max_cutoff=self.distance, + box=self.box, + return_distances=True) + if self.distance_type != 'heavy': + tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:, 0]] + tmp_hydrogens = [donors_idx[idx] for idx in pairs[:, 0]] + tmp_acceptors = [acceptor[idx] for idx in pairs[:, 1]] + else: + tmp_donors = [] + tmp_hydrogens = [] + tmp_acceptors = [] + for idx in range(len(pairs[:, 0])): + for h in donors[donors_idx[pairs[idx, 0]]]: + tmp_donors.append(donors_idx[pairs[idx, 0]]) + tmp_hydrogens.append(h) + tmp_acceptors.append(acceptor[pairs[idx, 1]]) + + angles = np.rad2deg( + calc_angles( + self.u.atoms[tmp_donors].positions, + self.u.atoms[tmp_hydrogens].positions, + self.u.atoms[tmp_acceptors].positions, + box=self.box + ) + ) + hbond_indices = np.where(angles > self.angle)[0] + for index in hbond_indices: + h = tmp_hydrogens[index] + d = tmp_donors[index] + a = tmp_acceptors[index] + result.append((h, d, a, self._expand_index(h), + self._expand_index(a), + distances[index], angles[index])) + return result + + def _single_frame(self): + self.timesteps.append(self._ts.time) + self.box = self.u.dimensions if self.pbc else None + + if self.update_selection: + self._update_selection() + if len(self._s1) and len(self._s2): + if self.update_water_selection: + self._update_water_selection() + else: + self._network.append(defaultdict(dict)) + return + + selection_1 = [] + water_pool = defaultdict(list) + next_round_water = set([]) + selection_2 = [] + + if self.selection1_type in ('donor', 'both'): + # check for direct hbond from s1 to s2 + self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") + results = self._donor2acceptor( + self._s1_donors_h, self._s1_h_donors, self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), \ + (a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)] = None + selection_1.append( + (h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + if self.order > 0: + self.logger_debug("Selection 1 Donors <-> Water Acceptors") + results = self._donor2acceptor( + self._s1_donors_h, self._s1_h_donors, + self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + selection_1.append( + (h_index, d_index, a_index, None, dist, angle)) + + self.logger_debug("Water Donors <-> Selection 2 Acceptors") + results = self._donor2acceptor( + self._water_donors_h, self._water_h_donors, + self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)].append( + (h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + + if self.selection1_type in ('acceptor', 'both'): + self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") + results = self._donor2acceptor(self._s2_donors_h, + self._s2_h_donors, + self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), \ + (a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)] = None + selection_1.append( + (a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + if self.order > 0: + self.logger_debug("Selection 2 Donors <-> Water Acceptors") + results = self._donor2acceptor( + self._s2_donors_h, self._s2_h_donors, + self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append( + (a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + self.logger_debug("Selection 1 Acceptors <-> Water Donors") + results = self._donor2acceptor( + self._water_donors_h, self._water_h_donors, + self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + selection_1.append( + (a_index, None, h_index, d_index, dist, angle)) + + if self.order > 1: + self.logger_debug("Water donor <-> Water Acceptors") + results = self._donor2acceptor(self._water_donors_h, + self._water_h_donors, + self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append( + (a_index, None, h_index, d_index, dist, angle)) + water_pool[(h_resname, h_resid)].append( + (h_index, d_index, a_index, None, dist, angle)) + + # solve the connectivity network + # The following code attempt to generate a water network which is + # formed by the class dict. + # Suppose we have a water bridge connection ARG1 to ASP3 via the two + # hydrogen bonds. + # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], + # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], + # The resulting network will be + # {(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): + # {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} + # Where the key of the a dict will be all the hydrogen bonds starting + # from this nodes. + # The corresponding value of a certain key will be a dictionary whose + # key will be all the hydrogen bonds from + # the destination of in the key. + # If the value of a certain key is None, which means it is reaching + # selection 2. + + result = {'start': defaultdict(dict), 'water': defaultdict(dict)} + + def add_route(result, route): + if len(route) == 1: + result['start'][route[0]] = None + else: + # exclude the the selection which goes back to itself + if (sorted(route[0][0:3:2]) == sorted(route[-1][0:3:2])): + return + + # selection 2 to water + result['water'][route[-1]] = None + # water to water + for i in range(1, len(route) - 1): + result['water'][route[i]][route[i+1]] = \ + result['water'][route[i+1]] + # selection 1 to water + result['start'][route[0]][route[1]] = result['water'][route[1]] + + def traverse_water_network(graph, node, end, route, maxdepth, result): + if len(route) > self.order + 1: + return + else: + if node in end: + # check if any duplication happens + if len(route) == len(set(route)): + add_route(result, route) + else: + for new_node in graph[node]: + new_route = route[:] + new_route.append(new_node) + new_node = self._expand_timeseries( + new_node, 'sele1_sele2')[3][:2] + traverse_water_network(graph, new_node, end, new_route, + maxdepth, result) + for s1 in selection_1: + route = [s1, ] + next_mol = self._expand_timeseries(s1, 'sele1_sele2')[3][:2] + traverse_water_network(water_pool, next_mol, selection_2, route[:], + self.order, result) + + self._network.append(result['start']) + + def _traverse_water_network(self, graph, current, analysis_func=None, + output=None, link_func=None, **kwargs): + ''' + This function recursively traverses the water network self._network and + finds the hydrogen bonds which connect the current atom to the next + atom. The newly found hydrogen bond will be appended to the hydrogen + bonds connecting the selection 1 to the current atom via link_func. + When selection 2 is reached, the full list of hydrogen bonds + connecting the selection 1 to selection 2 will be fed into + analysis_func, which will then modify the output in place. + :param graph: The connection network describes the connection between + the atoms in the water network. + :param current: The hydrogen bonds from selection 1 until now. + :param analysis_func: The analysis function which is called to analysis + the hydrogen bonds. + :param output: where the result is stored. + :param link_func: The new hydrogen bonds will be appended to current. + :param kwargs: the keywords which are passed into the analysis_func. + :return: + ''' + if link_func is None: + # If no link_func is provided, the default link_func will be used + link_func = self._full_link + + if graph is None: + # if selection 2 is reached + if analysis_func is not None: + # the result is analysed by analysis_func which will change the + # output + analysis_func(current, output, self.u, **kwargs) + else: + # make sure no loop can occur + if len(current) <= self.order: + for node in graph: + # the new hydrogen bond will be added to the existing bonds + new = link_func(current, node) + self._traverse_water_network(graph[node], new, + analysis_func, output, + link_func, **kwargs) + + def _expand_index(self, index): + ''' + Expand the index into (resname, resid, name). + ''' + atom = self.u.atoms[index] + return (atom.resname, atom.resid, atom.name) + + def _expand_timeseries(self, entry, output_format=None): + ''' + Expand the compact data format into the old timeseries form. + The old is defined as the format for release up to 0.19.2. + As is discussed in Issue #2177, the internal storage of the hydrogen + bond information has been changed to the compact format. + The function takes in the argument `output_format` to see which output + format will be chosen. + if `output_format` is not specified, the value will be taken from + :attr:`output_format`. + If `output_format` is 'sele1_sele2', the output will be the old water + bridge analysis format:: + # donor from selection 1 to acceptor in selection 2 + [sele1_index, sele2_index, + (sele1_resname, sele1_resid, sele1_name), + (sele2_resname, sele2_resid, sele2_name), dist, angle] + If `output_format` is 'donor_acceptor', the output will be the old + hydrogen bond analysis format:: + # From donor to acceptor + [donor_index, acceptor_index, + (donor_resname, donor_resid, donor_name), + (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] + ''' + output_format = output_format or self.output_format + # Expand the compact entry into atom1, which is the first index in the + # output and atom2, which is the second + # entry. + atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry + if output_format == 'sele1_sele2': + # If the output format is the sele1_sele2, no change will be + # executed + atom1, atom2 = atom1, atom2 + elif output_format == 'donor_acceptor': + # If the output format is donor_acceptor, use heavy atom position + # to check which is donor and which is + # acceptor + if heavy_atom1 is None: + # atom1 is hydrogen bond acceptor and thus, the position of + # atom1 and atom2 are swapped. + atom1, atom2 = atom2, atom1 + else: + # atom1 is hydrogen bond donor, position not swapped. + atom1, atom2 = atom1, atom2 + else: + raise KeyError( + "Only 'sele1_sele2' or 'donor_acceptor' are allowed as output " + "format") + + return (atom1, atom2, self._expand_index(atom1), + self._expand_index(atom2), dist, angle) + + def _generate_timeseries(self, output_format=None): + r'''Time series of water bridges. + The output is generated per frame as is explained in + :ref:`wb_Analysis_Timeseries`. The format of output can be changed via + the output_format selection. If ``output_format="sele1_sele2"``, the + hydrogen bond forms a directional link from selection 1 to selection 2. + If ``output_format="donor_acceptor"``, for each hydrogen bond, the + donor is always written before the acceptor. + Note + ---- + To find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. + The :attr:`timeseries` is a managed attribute and it is generated + from the underlying data in :attr:`_network` every time the + attribute is accessed. It is therefore costly to call and if + :attr:`timeseries` is needed repeatedly it is recommended that you + assign to a variable:: + w = WaterBridgeAnalysis(u) + w.run() + timeseries = w.timeseries + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated where + the donor and acceptor string has been changed to tuple + (resname string, resid, name_string). + ''' + output_format = output_format or self.output_format + + def analysis(current, output, *args, **kwargs): + output = current + + timeseries = [] + for frame in self._network: + new_frame = [] + self._traverse_water_network(frame, new_frame, + analysis_func=analysis, + output=new_frame, + link_func=self._compact_link) + timeseries.append([ + self._expand_timeseries(entry, output_format) + for entry in new_frame]) + return timeseries + + timeseries = property(_generate_timeseries) + + def _get_network(self): + r'''Network representation of the water network. + The output is generated per frame as is explained in + :ref:`wb_Analysis_Network`. Each hydrogen bond has a compact + representation of :: + [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, + distance, angle] + or :: + [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, + distance, angle] + The donor_heavy_idx is the heavy atom bonding to the proton and atoms + can be retrived from the universe:: + atom = u.atoms[idx] + .. versionadded:: 0.20.0 + ''' + return self._network + + def set_network(self, network): + self._network = network + + network = property(_get_network, set_network) + + @classmethod + def _full_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen + bond to the existing bonds. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen + bond added + ''' + result = output[:] + result.append(node) + return result + + @classmethod + def _compact_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen + bond to the existing bonds. In this form no new list is created and + thus, one bridge will only appear once. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen + bond added + ''' + output.append(node) + return output + + def _count_by_type_analysis(self, current, output, *args, **kwargs): + ''' + Generates the key for count_by_type analysis. + :return: + ''' + + s1_index, to_index, s1, to_residue, dist, angle = \ + self._expand_timeseries(current[0]) + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ + self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 + key = (s1_index, s2_index, + s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_type(self, analysis_func=None, **kwargs): + """Counts the frequency of water bridge of a specific type. + If one atom *A* from *selection 1* is linked to atom *B* from + *selection 2* through one or more bridging waters, an entity will be + created and the proportion of time that this linkage exists in the + whole simulation will be calculated. + The identification of a specific type of water bridge can be modified + by supplying the analysis_func function. See :ref:`wb_count_by_type` + for detail. + Returns + ------- + counts : list + Returns a :class:`list` containing atom indices for *A* and + *B*, residue names, residue numbers, atom names (for both A and B) + and the fraction of the total time during which the water bridge + was detected. This method returns None if method + :meth:`WaterBridgeAnalysis.run` was not executed first. + """ + output = None + if analysis_func is None: + analysis_func = self._count_by_type_analysis + output = 'combined' + + if self._network: + length = len(self._network) + result_dict = defaultdict(int) + for frame in self._network: + frame_dict = defaultdict(int) + self._traverse_water_network(frame, [], + analysis_func=analysis_func, + output=frame_dict, + link_func=self._full_link, + **kwargs) + for key, value in frame_dict.items(): + result_dict[key] += frame_dict[key] + + if output == 'combined': + result = [[i for i in key] for key in result_dict] + [result[i].append(result_dict[key]/length) + for i, key in enumerate(result_dict)] + else: + result = [(key, + result_dict[key]/length) for key in result_dict] + return result + else: + return None + + def _count_by_time_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, s1, to_residue, dist, angle = \ + self._expand_timeseries(current[0]) + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ + self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 + key = (s1_index, s2_index, + s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_time(self, analysis_func=None, **kwargs): + """Counts the number of water bridges per timestep. + The counting behaviour can be adjusted by supplying analysis_func. + See :ref:`wb_count_by_time` for details. + Returns + ------- + counts : list + Returns a time series ``N(t)`` where ``N`` is the total + number of observed water bridges at time ``t``. + """ + if analysis_func is None: + analysis_func = self._count_by_time_analysis + if self._network: + result = [] + for time, frame in zip(self.timesteps, self._network): + result_dict = defaultdict(int) + self._traverse_water_network(frame, [], + analysis_func=analysis_func, + output=result_dict, + link_func=self._full_link, + **kwargs) + result.append((time, + sum([result_dict[key] for key in result_dict]))) + return result + else: + return None + + def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, s1, to_residue, dist, angle = \ + self._expand_timeseries(current[0]) + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ + self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, + s2_resid, s2_name) + output[key].append(kwargs.pop('time')) + + def timesteps_by_type(self, analysis_func=None, **kwargs): + """Frames during which each water bridges existed, sorted by each water + bridges. + Processes :attr:`WaterBridgeAnalysis._network` and returns a + :class:`list` containing atom indices, residue names, residue + numbers (from selection 1 and selection 2) and each timestep at which + the water bridge was detected. + Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and + :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be + adjusted by supplying an analysis_func. + Returns + ------- + data : list + """ + output = None + if analysis_func is None: + analysis_func = self._timesteps_by_type_analysis + output = 'combined' + + if self._network: + result = defaultdict(list) + if self.timesteps is None: + timesteps = range(len(self._network)) + else: + timesteps = self.timesteps + for time, frame in zip(timesteps, self._network): + self._traverse_water_network(frame, [], + analysis_func=analysis_func, + output=result, + link_func=self._full_link, + time=time, **kwargs) + + result_list = [] + for key, time_list in result.items(): + for time in time_list: + if output == 'combined': + key = list(key) + key.append(time) + result_list.append(key) + else: + result_list.append((key, time)) + return result_list + else: + return None + + def generate_table(self, output_format=None): + """Generate a normalised table of the results. + The table is stored as a :class:`numpy.recarray` in the + attribute :attr:`~WaterBridgeAnalysis.table`. + The output format of :attr:`~WaterBridgeAnalysis.table` can also be + changed using output_format in a fashion similar to + :attr:`WaterBridgeAnalysis.timeseries` + """ + output_format = output_format or self.output_format + if self._network == []: + msg = "No data computed, do run() first." + warnings.warn(msg, category=MissingDataWarning) + logger.warning(msg) + return None + timeseries = self._generate_timeseries(output_format) + + num_records = np.sum([len(hframe) for hframe in timeseries]) + # build empty output table + if output_format == 'sele1_sele2': + dtype = [ + ("time", float), + ("sele1_index", int), ("sele2_index", int), + ("sele1_resnm", "|U4"), ("sele1_resid", int), + ("sele1_atom", "|U4"), + ("sele2_resnm", "|U4"), ("sele2_resid", int), + ("sele2_atom", "|U4"), + ("distance", float), ("angle", float)] + elif output_format == 'donor_acceptor': + dtype = [ + ("time", float), + ("donor_index", int), ("acceptor_index", int), + ("donor_resnm", "|U4"), ("donor_resid", int), + ("donor_atom", "|U4"), + ("acceptor_resnm", "|U4"), ("acceptor_resid", int), + ("acceptor_atom", "|U4"), + ("distance", float), ("angle", float)] + + # according to Lukas' notes below, using a recarray at this stage is + # ineffective and speedups of ~x10 can be achieved by filling a + # standard array, like this: + out = np.empty((num_records,), dtype=dtype) + cursor = 0 # current row + for t, hframe in zip(self.timesteps, timeseries): + for (donor_index, acceptor_index, donor, + acceptor, distance, angle) in hframe: + # donor|acceptor = (resname, resid, atomid) + out[cursor] = (t, donor_index, acceptor_index) + \ + donor + acceptor + (distance, angle) + cursor += 1 + assert cursor == num_records, \ + "Internal Error: Not all wb records stored" + table = out.view(np.recarray) + logger.debug( + "WBridge: Stored results as table with %(num_records)d entries.", + vars()) + self.table = table diff --git a/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst b/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst index b022b17c4a4..ea9ab08ee18 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst @@ -1 +1 @@ -.. automodule:: MDAnalysis.analysis.hbonds.wbridge_analysis +.. automodule:: MDAnalysis.analysis.hydrogenbonds.wbridge_analysis diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index ec2904ac720..376d43582bd 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,6 +1,7 @@ from __future__ import print_function, absolute_import from six import StringIO from collections import defaultdict +from importlib import reload from numpy.testing import ( assert_equal, assert_array_equal,) @@ -8,15 +9,12 @@ import MDAnalysis import MDAnalysis.analysis.hbonds -from MDAnalysis.analysis.hbonds.wbridge_analysis import WaterBridgeAnalysis - -def test_import_from_hbonds(): - try: - from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis - except ImportError: - raise AssertionError("Issue #2064 not fixed: " - "importing WaterBridgeAnalysis from " - "MDAnalysis.analysis.hbonds failed.'") +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import WaterBridgeAnalysis + +def test_import_warning(): + wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' + with pytest.warns(DeprecationWarning, match=wmsg): + reload(MDAnalysis.analysis.hbonds.wbridge_analysis) class TestWaterBridgeAnalysis(object): @staticmethod From a6194f6befdded406b44469853e061bafa6d51e8 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Thu, 28 Jan 2021 09:58:36 +0000 Subject: [PATCH 02/14] PEP8 --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 376d43582bd..ca84b8d3874 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -9,7 +9,9 @@ import MDAnalysis import MDAnalysis.analysis.hbonds -from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import WaterBridgeAnalysis +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) + def test_import_warning(): wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' From e58f6232b559f7b18567b195ef070e3171a13795 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Thu, 28 Jan 2021 10:06:17 +0000 Subject: [PATCH 03/14] fix warning --- package/MDAnalysis/analysis/hbonds/wbridge_analysis.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index bb133e74397..1de6b4e41db 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -22,6 +22,9 @@ # # Water Bridge Analysis +import warnings + + with warnings.catch_warnings(): warnings.simplefilter('always', DeprecationWarning) warnings.warn(("This module has been moved to " From d48985eb7477cb132e5634a2580fd426761fe3b8 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Thu, 28 Jan 2021 10:15:53 +0000 Subject: [PATCH 04/14] fix doc --- .../hydrogenbonds/wbridge_analysis.py | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 074955a1f17..3836246481d 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -25,29 +25,44 @@ r"""Water Bridge analysis --- mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` =============================================================================== + :Author: Zhiyi Wu :Year: 2017-2018 :Copyright: GNU Public License v3 :Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub + + .. _`@xiki-tempula`: https://github.com/xiki-tempula + + Given a :class:`~MDAnalysis.core.universe.Universe` (simulation trajectory with 1 or more frames) measure all water bridges for each frame between selections 1 and 2. Water bridge is defined as a bridging water which simultaneously forms two hydrogen bonds with atoms from both selection 1 and selection 2. + A water bridge can form between two hydrogen bond acceptors. + e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O−H···:\ :sup:`-`\ O\ :sub:`2`\ C- + A water bridge can also form between two hydrogen bond donors. + e.g. -NH···:O:···HN- (where O is the oxygen of a bridging water) + A hydrogen bond acceptor and another hydrogen bond donor can be bridged by a water. + e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) + A higher order water bridge is defined as more than one water bridging hydrogen bond acceptor and donor. An example of a second order water bridge: + e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) + The following keyword arguments are important to control the behaviour of the water bridge analysis: + - *water_selection* (``resname SOL``): the selection string for the bridging water - *order* the maximum number of water bridging both ends @@ -55,17 +70,22 @@ - Angle *cutoff* (degrees): 120.0 - *forcefield* to switch between default values for different force fields - *donors* and *acceptors* atom types (to add additional atom names) + Theory ------ + This module attempts to find multi-order water bridge by an approach similar to breadth-first search, where the first solvation shell of selection 1 is selected, followed by the selection of the second solvation shell as well as any hydrogen bonding partner from selection 1. After that, the third solvation shell, as well as any binding partners from selection 2, are detected. This process is repeated until the maximum order of water bridges is reached. + .. _wb_Analysis_Network: + Output as Network ----------------- + Since the waters connecting the two ends of the selections are by nature a network. We provide a network representation of the water network. Water bridge data are returned per frame, which is stored in @@ -73,10 +93,12 @@ where the keys are the hydrogen bonds originating from selection 1 and the values are new dictionaries representing the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds with selection 1. + As for the hydrogen bonds which reach the selection 2, the values of the corresponding keys are None. One example where selection 1 and selection 2 are joined by one water molecule (A) which also hydrogen bond to another water (B) which also hydrogen bond to selection 2 would be represented as :: + # (selection 1)-O:···H-O(water 1):···H-(selection 2) # | : # H·············O-H(water2) @@ -90,13 +112,17 @@ distance, angle): None} }, } + The atoms are represented by atom index and if the atom is hydrogen bond donor, it is followed by the index of the corresponding heavy atom ``(donor_proton, donor_heavy_atom)``. If the atom is a hydrogen bond acceptor, it is followed by none. + .. _wb_Analysis_Timeseries: + Output as Timeseries -------------------- + For lower order water bridges, it might be desirable to represent the connections as :attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and are a list of hydrogen bonds between the selection 1 or selection @@ -106,19 +132,24 @@ :attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first search manner to avoid duplication. Example code of how :attr:`WaterBridgeAnalysis.timeseries` is generated:: + def network2timeseries(network, timeseries): '''Traverse the network in a depth-first fashion. expand_timeseries will expand the compact representation to the familiar timeseries representation.''' + if network is None: return else: for node in network: timeseries.append(expand_timeseries(node)) network2timeseries(network[node], timeseries) + timeseries = [] network2timeseries(network, timeseries) + An example would be. :: + results = [ [ # frame 1 [ , , @@ -133,35 +164,47 @@ def network2timeseries(network, timeseries): ], ... ] + Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat the results as a flat "normalised" table that is easier to import into a database or dataframe for further processing. + Detection of water bridges -------------------------- Water bridges are recorded if a bridging water simultaneously forms hydrogen bonds with selection 1 and selection 2. + Hydrogen bonds are detected based on a geometric criterion: + 1. The distance between acceptor and hydrogen is less than or equal to `distance` (default is 3 Å). + 2. The angle between donor-hydrogen-acceptor is greater than or equal to `angle` (default is 120º). + The cut-off values `angle` and `distance` can be set as keywords to :class:`WaterBridgeAnalysis`. + Donor and acceptor heavy atoms are detected from atom names. The current defaults are appropriate for the CHARMM27 and GLYCAM06 force fields as defined in Table `Default atom names for water bridge analysis`_. + Hydrogen atoms bonded to a donor are searched based on its distance to the donor. The algorithm searches for all hydrogens (name "H*" or name "[123]H" or type "H") in the same residue as the donor atom within a cut-off distance of 1.2 Å. + .. _Default atom names for water bridge analysis: + .. table:: Default heavy atom names for CHARMM27 force field. + =========== ============== =========== ==================================== group donor acceptor comments =========== ============== =========== ==================================== main chain N O, OC1, OC2 OC1, OC2 from amber99sb-ildn (Gromacs) water OH2, OW OH2, OW SPC, TIP3P, TIP4P (CHARMM27,Gromacs) + ARG NE, NH1, NH2 ASN ND2 OD1 ASP OD1, OD2 @@ -180,7 +223,9 @@ def network2timeseries(network, timeseries): TRP NE1 TYR OH OH =========== ============== =========== ==================================== + .. table:: Heavy atom types for GLYCAM06 force field. + =========== =========== ================== element donor acceptor =========== =========== ================== @@ -188,15 +233,19 @@ def network2timeseries(network, timeseries): O OH,OW O,O2,OH,OS,OW,OY S SM =========== =========== ================== + Donor and acceptor names for the CHARMM27 force field will also work for e.g. OPLS/AA or amber (tested in Gromacs). Residue names in the table are for information only and are not taken into account when determining acceptors and donors. This can potentially lead to some ambiguity in the assignment of donors/acceptors for residues such as histidine or cytosine. + For more information about the naming convention in GLYCAM06 have a look at the `Carbohydrate Naming Convention in Glycam`_. + .. _`Carbohydrate Naming Convention in Glycam`: http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm + The lists of donor and acceptor names can be extended by providing lists of atom names in the `donors` and `acceptors` keywords to :class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate @@ -204,33 +253,45 @@ def network2timeseries(network, timeseries): different atom names) then one should either use the value "other" for `forcefield` to set no default values or derive a new class and set the default list oneself:: + class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} + Then simply use the new class instead of the parent class and call it with ```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy atom names to MDAnalysis. + How to perform WaterBridgeAnalysis ---------------------------------- + All water bridges between arginine and aspartic acid can be analysed with :: + import MDAnalysis import MDAnalysis.analysis.hbonds + u = MDAnalysis.Universe('topology', 'trajectory') w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') w.run() + The maximum number of bridging waters detected can be changed using the order keyword. :: + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', order=3) + Thus, a maximum of three bridging waters will be detected. + An example of using the :attr:`~WaterBridgeAnalysis` would be detecting the percentage of time a certain water bridge exits. + Trajectory :code:`u` has two frames, where the first frame contains a water bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but is between the oxygen of the arginine and the other oxygen in the carboxylic group (ASP3:OD2). :: + # index residue id residue name atom name # 0 1 ARG O # 1 2 SOL OW @@ -239,7 +300,9 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): # 4 3 ASP OD1 # 5 3 ASP OD2 print(w.timeseries) + prints out. :: + [ # frame 1 # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], @@ -252,28 +315,39 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], ], ] + + .. _wb_count_by_type: + Use count_by_type ----------------- + We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to generate the frequence of all water bridges in the simulation. :: + w.count_by_type() + Returns :: + [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] + You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped and thus, they should be counted as the same type of water bridge. The type of the water bridge can be customised by supplying an analysis function to :meth:`~WaterBridgeAnalysis.count_by_type`. + The analysis function has two parameters. The current and the output. The current is a list of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as :attr:`WaterBridgeAnalysis.network`, and an example will be :: + [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. [ 0, None, 2, 1, 3.0,180], # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. [ 3, 1, 4, None, 3.0,180],] + Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is the final hydrogen bond ending in selection 2. The output sums up all the information in the current frame and is a dictionary @@ -282,9 +356,11 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): bridges and modify the output in-place. At the end of the analysis, the keys from all the frames are collected and the corresponding values will be summed up and returned. :: + def analysis(current, output, u): r'''This function defines how the type of water bridge should be specified. + Parameters ---------- current : list @@ -296,6 +372,7 @@ def analysis(current, output, u): water bridge. u : MDAnalysis.universe The current Universe for looking up atoms.''' + # decompose the first hydrogen bond. sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] @@ -317,13 +394,19 @@ def analysis(current, output, u): key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) # The number of this type of water bridge is incremented by 1. output[key] += 1 + w.count_by_type(analysis_func=analysis) + Returns :: + [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] + Note that the result is arranged in the format of ``(key, the proportion of time)``. When no custom analysis function is supplied , the key is expanded and is formatted as :: + [('ARG', 1, 'O', 'ASP', 3, 'OD', 1.0),] + Some people might only interested in contacts between residues and pay no attention to the details regarding the atom name. However, since multiple water bridges can exist between two residues, which sometimes can give a result such @@ -332,9 +415,11 @@ def analysis(current, output, u): between two residues to be only counted once per frame. This can also be achieved by supplying an analysis function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): '''This function defines how the type of water bridge should be specified . + Parameters ---------- current : list @@ -347,6 +432,7 @@ def analysis(current, output, u): u : MDAnalysis.universe The current Universe for looking up atoms. ''' + # decompose the first hydrogen bond. sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] @@ -362,18 +448,25 @@ def analysis(current, output, u): sele2.name) # s1_name and s2_name are not included in the key key = (s1_resname, s1_resid, s2_resname, s2_resid) + # Each residue is only counted once per frame output[key] = 1 + w.count_by_type(analysis_func=analysis) + Returns :: + [(('ARG', 1, 'ASP', 3), 1.0),] + On the other hand, other people may insist that the first order and second-order water bridges shouldn't be mixed together, which can also be achieved by supplying an analysis function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): '''This function defines how the type of water bridge should be specified . + Parameters ---------- current : list @@ -386,6 +479,7 @@ def analysis(current, output, u): u : MDAnalysis.universe The current Universe for looking up atoms. ''' + # decompose the first hydrogen bond. sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] @@ -405,16 +499,22 @@ def analysis(current, output, u): key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) # The number of this type of water bridge is incremented by 1. output[key] += 1 + w.count_by_type(analysis_func=analysis) + The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: + [(('ARG', 1, 'ASP', 3, 1), 1.0),] + Some people might not be interested in the interactions related to arginine. The undesirable interactions can be discarded by supplying an analysis function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, u): '''This function defines how the type of water bridge should be specified. + Parameters ---------- current : list @@ -427,6 +527,7 @@ def analysis(current, output, u): u : MDAnalysis.universe The current Universe for looking up atoms. ''' + # decompose the first hydrogen bond. sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] @@ -443,39 +544,58 @@ def analysis(current, output, u): if not s1_resname == 'ARG': key = (s1_resname, s1_resid, s2_resname, s2_resid) output[key] += 1 + w.count_by_type(analysis_func=analysis) + Returns nothing in this case :: + [,] + Additional keywords can be supplied to the analysis function by passing through :meth:`~WaterBridgeAnalysis.count_by_type`. :: + def analysis(current, output, **kwargs): ... w.count_by_type(analysis_func=analysis, **kwargs) + + .. _wb_count_by_time: + Use count_by_time ----------------- + :meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` provides additional functionality for aggregating results for each frame. + The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt bridge for example: :: + w.count_by_time() + As one water bridge is found in both frames, the method returns :: + [(1.0, 1), (2.0, 1), ] + Similar to :meth:`~WaterBridgeAnalysis.count_by_type` The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying an analysis function. + Suppose we want to count + - the **number** of water molecules involved in bridging selection 1 to selection 2. - only if water bridge terminates in atom name **OD1 of ASP**. - only when water bridge is joined by less than **two** water. + The analysis function can be written as:: + def analysis(current, output, u, **kwargs): '''This function defines how the counting of water bridge should be specified. + Parameters ---------- current : list @@ -488,6 +608,7 @@ def analysis(current, output, u, **kwargs): u : MDAnalysis.universe The current Universe for looking up atoms. ''' + # decompose the first hydrogen bond. sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] @@ -501,6 +622,7 @@ def analysis(current, output, u, **kwargs): (sele1.resname, sele1.resid, sele1.name) (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # only the residue name is ASP and the atom name is OD1, if s2_resname == 'ASP' and s2_name == 'OD1': # only if the order of water bridge is less than 2 @@ -511,6 +633,7 @@ def analysis(current, output, u, **kwargs): (to_resname, to_resid, to_name), dist, angle = current[0] key = (to_resname, to_resid) output[key] = 1 + # extract all the waters between selection 1 and selection 2 for hbond in current[1:-1]: # decompose the hydrogen bond. @@ -522,22 +645,31 @@ def analysis(current, output, u, **kwargs): # add second water key2 = (to_resname, to_resid) output[key2] = 1 + # extract the last water to selection 2 from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = current[-1] key = (from_resname, from_resid) output[key] = 1 + w.count_by_time(analysis_func=analysis) + Returns :: + [(1.0, 1), (2.0, 0),] + Classes ------- + .. autoclass:: WaterBridgeAnalysis :members: + .. attribute:: timesteps + List of the times of each timestep. This can be used together with :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point of a water bridge existence. + """ from collections import defaultdict import logging @@ -555,11 +687,15 @@ def analysis(current, output, u, **kwargs): class WaterBridgeAnalysis(AnalysisBase): """Perform a water bridge analysis + The analysis of the trajectory is performed with the :meth:`WaterBridgeAnalysis.run` method. The result is stored in :attr:`WaterBridgeAnalysis.timeseries`. See :meth:`~WaterBridgeAnalysis.run` for the format. + + .. versionadded:: 0.17.0 + """ # use tuple(set()) here so that one can just copy&paste names from the # table; set() takes care for removing duplicates. At the end the @@ -604,14 +740,17 @@ def __init__(self, universe, selection1='protein', pbc=False, **kwargs): """Set up the calculation of water bridges between two selections in a universe. + The timeseries is accessible as the attribute :attr:`WaterBridgeAnalysis.timeseries`. + If no hydrogen bonds are detected or if the initial check fails, look at the log output (enable with :func:`MDAnalysis.start_logging` and set `verbose` ``=True``). It is likely that the default names for donors and acceptors are not suitable (especially for non-standard ligands). In this case, either change the `forcefield` or use customized `donors` and/or `acceptors`. + Parameters ---------- universe : Universe @@ -627,12 +766,14 @@ def __init__(self, universe, selection1='protein', The default selection assumes that the water molecules have residue name "SOL". Change it to the appropriate selection for your specific force field. + However, in theory, this selection can be anything which forms a hydrogen bond with selection 1 and selection 2. order : int (optional) The maximum number of water bridges linking both selections. if the order is set to 3, then all the residues linked with less than three water molecules will be detected. [1] + Computation of high order water bridges can be very time-consuming. Think carefully before running the calculation, do you really want to compute the 20th order water bridge between domain A and domain @@ -656,6 +797,7 @@ def __init__(self, universe, selection1='protein', **only** recommended when the total amount of water molecules in the simulation are small and when water molecules remain static across the simulation. + However, in normal simulations, only a tiny proportion of water is engaged in the formation of water bridge. It is recommended to update the water selection and set keyword `filter_first` to @@ -707,21 +849,26 @@ def __init__(self, universe, selection1='protein', verbose : bool (optional) Toggle progress output. (Can also be given as keyword argument to :meth:`run`.) + Notes ----- In order to speed up processing, atoms are filtered by a coarse distance criterion before a detailed hydrogen bonding analysis is performed (`filter_first` = ``True``). + If selection 1 and selection 2 are very mobile during the simulation and the contact surface is constantly changing (i.e. residues are moving farther than 4 Å + `order` * (2 Å + `distance`)), you might consider setting the `update_selection` keywords to ``True`` to ensure correctness. + .. versionchanged 0.20.0 The :attr:`WaterBridgeAnalysis.timeseries` has been updated see :attr:`WaterBridgeAnalysis.timeseries` for detail. This class is now based on :class:`~MDAnalysis.analysis.base.AnalysisBase`. + + """ super(WaterBridgeAnalysis, self).__init__(universe.trajectory, **kwargs) @@ -930,19 +1077,24 @@ def _update_water_selection(self): def _get_bonded_hydrogens(self, atom): """Find hydrogens bonded within cutoff to `atom`. + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); this is not fool-proof as the atom type is not always a character but the name pattern should catch most typical occurrences. + The distance from `atom` is calculated for all hydrogens in the residue and only those within a cutoff are kept. The cutoff depends on the heavy atom (more precisely, on its element, which is taken as the first letter of its name ``atom.name[0]``) and is parameterized in :attr:`WaterBridgeAnalysis.r_cov`. If no match is found then the default of 1.5 Å is used. + + Parameters ---------- atom : groups.Atom heavy atom + Returns ------- hydrogen_atoms : AtomGroup or [] @@ -1266,12 +1418,15 @@ def _expand_timeseries(self, entry, output_format=None): :attr:`output_format`. If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: + # donor from selection 1 to acceptor in selection 2 [sele1_index, sele2_index, (sele1_resname, sele1_resid, sele1_name), (sele2_resname, sele2_resid, sele2_name), dist, angle] + If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: + # From donor to acceptor [donor_index, acceptor_index, (donor_resname, donor_resid, donor_name), @@ -1307,28 +1462,35 @@ def _expand_timeseries(self, entry, output_format=None): def _generate_timeseries(self, output_format=None): r'''Time series of water bridges. + The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. The format of output can be changed via the output_format selection. If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, for each hydrogen bond, the donor is always written before the acceptor. + Note ---- To find an acceptor atom in :attr:`Universe.atoms` by *index* one would use ``u.atoms[acceptor_index]``. + The :attr:`timeseries` is a managed attribute and it is generated from the underlying data in :attr:`_network` every time the attribute is accessed. It is therefore costly to call and if :attr:`timeseries` is needed repeatedly it is recommended that you assign to a variable:: + w = WaterBridgeAnalysis(u) w.run() timeseries = w.timeseries + .. versionchanged 0.20.0 The :attr:`WaterBridgeAnalysis.timeseries` has been updated where the donor and acceptor string has been changed to tuple (resname string, resid, name_string). + + ''' output_format = output_format or self.output_format @@ -1351,18 +1513,26 @@ def analysis(current, output, *args, **kwargs): def _get_network(self): r'''Network representation of the water network. + The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. Each hydrogen bond has a compact representation of :: + [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] + or :: + [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] + The donor_heavy_idx is the heavy atom bonding to the proton and atoms can be retrived from the universe:: + atom = u.atoms[idx] + .. versionadded:: 0.20.0 + ''' return self._network @@ -1417,13 +1587,16 @@ def _count_by_type_analysis(self, current, output, *args, **kwargs): def count_by_type(self, analysis_func=None, **kwargs): """Counts the frequency of water bridge of a specific type. + If one atom *A* from *selection 1* is linked to atom *B* from *selection 2* through one or more bridging waters, an entity will be created and the proportion of time that this linkage exists in the whole simulation will be calculated. + The identification of a specific type of water bridge can be modified by supplying the analysis_func function. See :ref:`wb_count_by_type` for detail. + Returns ------- counts : list @@ -1432,6 +1605,8 @@ def count_by_type(self, analysis_func=None, **kwargs): and the fraction of the total time during which the water bridge was detected. This method returns None if method :meth:`WaterBridgeAnalysis.run` was not executed first. + + """ output = None if analysis_func is None: @@ -1475,13 +1650,16 @@ def _count_by_time_analysis(self, current, output, *args, **kwargs): def count_by_time(self, analysis_func=None, **kwargs): """Counts the number of water bridges per timestep. + The counting behaviour can be adjusted by supplying analysis_func. See :ref:`wb_count_by_time` for details. + Returns ------- counts : list Returns a time series ``N(t)`` where ``N`` is the total number of observed water bridges at time ``t``. + """ if analysis_func is None: analysis_func = self._count_by_time_analysis @@ -1514,16 +1692,20 @@ def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): def timesteps_by_type(self, analysis_func=None, **kwargs): """Frames during which each water bridges existed, sorted by each water bridges. + Processes :attr:`WaterBridgeAnalysis._network` and returns a :class:`list` containing atom indices, residue names, residue numbers (from selection 1 and selection 2) and each timestep at which the water bridge was detected. + Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by supplying an analysis_func. + Returns ------- data : list + """ output = None if analysis_func is None: @@ -1558,8 +1740,10 @@ def timesteps_by_type(self, analysis_func=None, **kwargs): def generate_table(self, output_format=None): """Generate a normalised table of the results. + The table is stored as a :class:`numpy.recarray` in the attribute :attr:`~WaterBridgeAnalysis.table`. + The output format of :attr:`~WaterBridgeAnalysis.table` can also be changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` From 5d8521c3ef88ce3ffbd1e349b1208a67790937f2 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Thu, 28 Jan 2021 10:29:43 +0000 Subject: [PATCH 05/14] fix test --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index ca84b8d3874..bbce9f510f2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -14,7 +14,7 @@ def test_import_warning(): - wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' + wmsg = 'This module has been moved to' with pytest.warns(DeprecationWarning, match=wmsg): reload(MDAnalysis.analysis.hbonds.wbridge_analysis) From 53783c2c54a8eebd0de2fd0cc1c189c5cd5c47eb Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 11:12:03 +0000 Subject: [PATCH 06/14] Update CHANGELOG --- package/CHANGELOG | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 355a0f5b8a8..e58d723f74d 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -25,6 +25,10 @@ Fixes Changes * Maximum pinned versions in setup.py removed for python 3.6+ (PR #3139) + +Deprecations + * hbonds.WaterBridgeAnalysis will be removed in 2.0.0 and + replaced with hydrogenbonds.WaterBridgeAnalysis (#3111) 01/17/21 richardjgowers, IAlibay, orbeckst, tylerjereddy, jbarnoud, yuxuanzhuang, lilyminium, VOD555, p-j-smith, bieniekmateusz, @@ -83,8 +87,6 @@ Deprecations only a Universe, in 2.0.0 * analysis.helanal will be removed in 2.0.0 and replaced by analysis.helix_analysis (PR #2622) - * hbonds.WaterBridgeAnalysis will be removed in 2.0.0 and - replaced with hydrogenbonds.WaterBridgeAnalysis (#3111) 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, From f7187b3f1066a94ef56e28a731c786db5a240936 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 11:49:00 +0000 Subject: [PATCH 07/14] fix py27 issue --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index bbce9f510f2..e3ee1a78a1d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,7 +1,12 @@ from __future__ import print_function, absolute_import from six import StringIO from collections import defaultdict -from importlib import reload +try: + #reload was move to importlib from py34 + from importlib import reload +except: + # reload is a builtin function in py27 + pass from numpy.testing import ( assert_equal, assert_array_equal,) From 744f277ff8edfbc18c64670d6eb9ef99eefef837 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 12:26:06 +0000 Subject: [PATCH 08/14] rearrange the test --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index e3ee1a78a1d..968db0e2eaa 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -18,11 +18,6 @@ WaterBridgeAnalysis, ) -def test_import_warning(): - wmsg = 'This module has been moved to' - with pytest.warns(DeprecationWarning, match=wmsg): - reload(MDAnalysis.analysis.hbonds.wbridge_analysis) - class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') @@ -736,3 +731,8 @@ def test_timesteps_by_type(self, wb_multiframe): timesteps = sorted(wb_multiframe.timesteps_by_type()) assert_array_equal(timesteps[3], [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0, 2]) + +def test_import_warning(): + wmsg = 'This module has been moved to' + with pytest.warns(DeprecationWarning, match=wmsg): + reload(MDAnalysis.analysis.hbonds.wbridge_analysis) From 20f9af03997591949e43b47c47a274e1c78158f1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 13:23:03 +0000 Subject: [PATCH 09/14] py2 compatibkle --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 968db0e2eaa..d4c9048d6dc 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,12 +1,7 @@ from __future__ import print_function, absolute_import from six import StringIO from collections import defaultdict -try: - #reload was move to importlib from py34 - from importlib import reload -except: - # reload is a builtin function in py27 - pass +from six.moves import reload_module from numpy.testing import ( assert_equal, assert_array_equal,) @@ -735,4 +730,4 @@ def test_timesteps_by_type(self, wb_multiframe): def test_import_warning(): wmsg = 'This module has been moved to' with pytest.warns(DeprecationWarning, match=wmsg): - reload(MDAnalysis.analysis.hbonds.wbridge_analysis) + reload_module(MDAnalysis.analysis.hbonds.wbridge_analysis) From 7f05f551cebbb2455f4407a33ddb89468f8a3874 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 14:02:40 +0000 Subject: [PATCH 10/14] check if it is reload related error --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index d4c9048d6dc..a357969424e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -727,7 +727,7 @@ def test_timesteps_by_type(self, wb_multiframe): timesteps = sorted(wb_multiframe.timesteps_by_type()) assert_array_equal(timesteps[3], [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0, 2]) -def test_import_warning(): - wmsg = 'This module has been moved to' - with pytest.warns(DeprecationWarning, match=wmsg): - reload_module(MDAnalysis.analysis.hbonds.wbridge_analysis) +#def test_import_warning(): +# wmsg = 'This module has been moved to' +# with pytest.warns(DeprecationWarning, match=wmsg): +# reload_module(MDAnalysis.analysis.hbonds.wbridge_analysis) From 51ed243e9700fe9c27597620b86a01ef1ec80dae Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 14:05:06 +0000 Subject: [PATCH 11/14] from future import division --- .../MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py | 1 + testsuite/MDAnalysisTests/analysis/test_wbridge.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 3836246481d..d161f8709ee 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -675,6 +675,7 @@ def analysis(current, output, u, **kwargs): import logging import warnings import numpy as np +from __future__ import division from ..base import AnalysisBase from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index a357969424e..d4c9048d6dc 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -727,7 +727,7 @@ def test_timesteps_by_type(self, wb_multiframe): timesteps = sorted(wb_multiframe.timesteps_by_type()) assert_array_equal(timesteps[3], [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0, 2]) -#def test_import_warning(): -# wmsg = 'This module has been moved to' -# with pytest.warns(DeprecationWarning, match=wmsg): -# reload_module(MDAnalysis.analysis.hbonds.wbridge_analysis) +def test_import_warning(): + wmsg = 'This module has been moved to' + with pytest.warns(DeprecationWarning, match=wmsg): + reload_module(MDAnalysis.analysis.hbonds.wbridge_analysis) From 7c600d696175d3b87dacedf017a4811837e682c0 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 14:14:01 +0000 Subject: [PATCH 12/14] fix pep8 --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index d4c9048d6dc..eae3667f5fd 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -727,6 +727,7 @@ def test_timesteps_by_type(self, wb_multiframe): timesteps = sorted(wb_multiframe.timesteps_by_type()) assert_array_equal(timesteps[3], [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0, 2]) + def test_import_warning(): wmsg = 'This module has been moved to' with pytest.warns(DeprecationWarning, match=wmsg): From 1a87df22d341438af685d81acbc0a188a1b67813 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 14:15:13 +0000 Subject: [PATCH 13/14] fix doc --- package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index d161f8709ee..203572b4f1c 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -671,11 +671,11 @@ def analysis(current, output, u, **kwargs): of a water bridge existence. """ +from __future__ import division from collections import defaultdict import logging import warnings import numpy as np -from __future__ import division from ..base import AnalysisBase from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch From 1eecb1a40b009685a92f9b0cfa62bde06d42f03c Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 13 Mar 2021 17:09:34 +0000 Subject: [PATCH 14/14] for backward compatibility --- package/CHANGELOG | 2 +- .../analysis/hydrogenbonds/wbridge_analysis.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index e58d723f74d..ef0a8c5a631 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -??/??/?? richardjgowers, IAlibay, tylerjereddy +??/??/?? richardjgowers, IAlibay, tylerjereddy, xiki-tempula, * 1.0.2 diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 203572b4f1c..43d45aa054f 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -29,7 +29,7 @@ :Author: Zhiyi Wu :Year: 2017-2018 :Copyright: GNU Public License v3 -:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub +:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub .. _`@xiki-tempula`: https://github.com/xiki-tempula @@ -671,10 +671,13 @@ def analysis(current, output, u, **kwargs): of a water bridge existence. """ -from __future__ import division +from __future__ import print_function, absolute_import, division + from collections import defaultdict import logging import warnings +import six +from six.moves import range, zip import numpy as np from ..base import AnalysisBase @@ -1727,7 +1730,7 @@ def timesteps_by_type(self, analysis_func=None, **kwargs): time=time, **kwargs) result_list = [] - for key, time_list in result.items(): + for key, time_list in six.iteritems(result): for time in time_list: if output == 'combined': key = list(key)