diff --git a/.gitignore b/.gitignore index 18682c0c635..67f2bf4528e 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,5 @@ package/doc/html/ MDAnalysis.log # Ignore the authors.py files as they are generated files authors.py +# Ignore the .DS_Store file in the osx file system +*.DS_Store diff --git a/package/CHANGELOG b/package/CHANGELOG index 704597c61b1..195c34b0efc 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,7 +16,7 @@ The rules for this file: mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, dpadula85, jbarnoud, manuel.nuno.melo, richardjgowers, mattwthompson, ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith, Lp0lp, - IAlibay, tyler.je.reddy, aakognole, RMeli + IAlibay, tyler.je.reddy, aakognole, RMeli * 0.20.0 @@ -62,6 +62,7 @@ Enhancements * added unwrap keyword to center_of_mass (PR #2286) * added unwrap keyword to moment_of_inertia (PR #2287) * added unwrap keyword to asphericity (PR #2290) + * add high order water bridge support to water bridge analysis. (PR #2087) Changes * added official support for Python 3.7 (PR #1963) @@ -72,6 +73,7 @@ Changes PR #2201) * changed fudge_factor in guess_bonds to deal with new vdw radii (#2138, PR #2142) * bump minimum numpy version to 1.13.3 + * changed the water bridge analysis output format (PR #2087) Fixes * fixes ProgressMeter issues with older Jupyter Lab versions (Issue #2078) @@ -125,6 +127,9 @@ Fixes Deprecations * analysis.polymer.PersistenceLength no longer requires the perform_fit() method to be used, this is done automatically in run() + +Testsuite + * Add tests for the new water bridge analysis (PR #2087) 11/06/18 richardjgowers * 0.19.2 diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index 27cd574ec5d..7229a521690 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -26,7 +26,7 @@ =============================================================================== :Author: Zhiyi Wu -:Year: 2017 +:Year: 2017-2018 :Copyright: GNU Public License v3 :Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub @@ -53,76 +53,107 @@ e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) -The :class:`WaterBridgeAnalysis` class is modeled after the \ -:class:`~MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis`. +A higher order water bridge is defined as more than one water bridging +hydrogen bond acceptor and donor. An example of a second order water bridge: -The following keyword arguments are important to control the behavior of the +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) + +The following keyword arguments are important to control the behaviour of the water bridge analysis: - *water_selection* (``resname SOL``): the selection string for the bridging water + - *order* the maximum number of water bridging both ends - donor-acceptor *distance* (Å): 3.0 - Angle *cutoff* (degrees): 120.0 - *forcefield* to switch between default values for different force fields - *donors* and *acceptors* atom types (to add additional atom names) -.. _wb_Analysis_Output: - -Output +Theory ------ -The results are a list of hydrogen bonds between the selection 1 or selection 2 -and the bridging water. - -Each list is formated similar to the \ :attr:`HydrogenBondAnalysis.timeseries -` -and contains +This module attempts to find multi-order water bridge by an approach similar +to breadth-first search, where the first solvation shell of selection 1 is +selected, followed by the selection of the second solvation shell as well as +any hydrogen bonding partner from selection 1. After that, the third solvation +shell, as well as any binding partners from selection 2, are detected. This +process is repeated until the maximum order of water bridges is reached. + +.. _wb_Analysis_Network: + +Output as Network +----------------- + +Since the waters connecting the two ends of the selections are by nature a network. +We provide a network representation of the water network. Water bridge data are +returned per frame, which is stored in :attr:`WaterBridgeAnalysis.network`. Each +frame is represented as a dictionary, where the keys are the hydrogen bonds +originating from selection 1 and the values are new dictionaries representing +the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds +with selection 1. + +As for the hydrogen bonds which reach the selection 2, the values of the +corresponding keys are None. One example where selection 1 and selection 2 are +joined by one water molecule (A) which also hydrogen bond to another water (B) +which also hydrogen bond to selection 2 would be represented as :: + + # (selection 1)-O:···H-O(water 1):···H-(selection 2) + # | : + # H·············O-H(water2) + # H + {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): + {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None}, + {(water1_donor, water1_donor_heavy, water2_acceptor, None, distance, angle): + {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None} + }, + } + +The atoms are represented by atom index and if the atom is hydrogen bond donor, +it is followed by the index of the corresponding heavy atom +``(donor_proton, donor_heavy_atom)``. +If the atom is a hydrogen bond acceptor, it is followed by none. + +.. _wb_Analysis_Timeseries: + +Output as Timeseries +-------------------- + +For lower order water bridges, it might be desirable to represent the connections as +:attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and +are a list of hydrogen bonds between the selection 1 or selection 2 and the +bridging waters. Due to the complexity of the higher order water bridge and the +fact that one hydrogen bond between two waters can appear in both third and +fourth order water bridge, the hydrogen bonds in the +:attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first +search manner to avoid duplication. Example code of how +:attr:`WaterBridgeAnalysis.timeseries` is generated:: + + def network2timeseries(network, timeseries): + '''Traverse the network in a depth-first fashion. + expand_timeseries will expand the compact representation to the familiar + timeseries representation.''' + + if network is None: + return + else: + for node in network: + timeseries.append(expand_timeseries(node)) + network2timeseries(network[node], timeseries) - - the **identities** of donor and acceptor heavy-atoms, - - the **distance** between the heavy atom acceptor atom and the hydrogen atom - - the **angle** donor-hydrogen-acceptor angle (180º is linear). + timeseries = [] + network2timeseries(network, timeseries) -Water bridge data are returned per frame, which is stored in \ -:attr:`WaterBridgeAnalysis.timeseries` (In the following description, ``#`` -indicates comments that are not part of the output.):: +The list is formatted similar to the \ :attr:`HydrogenBondAnalysis.timeseries +` +except that the atom identifier is expressed as (residue name, residue number, +atom name). An example would be. :: results = [ [ # frame 1 - # hbonds linking the selection 1 and selection 2 to the bridging - # water 1 - [ # hbond 1 from selection 1 to the bridging water 1 - , - , , , - , - ], - [ # hbond 2 from selection 1 to the bridging water 1 - , - , , , - , - ], - [ # hbond 1 from selection 2 to the bridging water 1 - , - , , , - , - ], - [ # hbond 2 from selection 2 to the bridging water 1 - , - , , , - , - ], - - # hbonds linking the selection 1 and selection 2 to the bridging - # water 2 - [ # hbond 1 from selection 1 to the bridging water 2 - , - , , , - , - ], - [ # hbond 1 from selection 2 to the bridging water 2 - , - , , , - , - ], + [ , , + (, , ), + (, , ), + , ], .... ], [ # frame 2 @@ -134,12 +165,10 @@ Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat the results as a flat "normalised" table that is easier to import into a database or dataframe for further processing. -:meth:`WaterBridgeAnalysis.save_table` saves the table to a pickled file. The -table itself is a :class:`numpy.recarray`. Detection of water bridges ---------------------------- -Water bridges are recorded if a bridging water simultaneously forms two +-------------------------- +Water bridges are recorded if a bridging water simultaneously forms hydrogen bonds with selection 1 and selection 2. Hydrogen bonds are detected as is described in \ @@ -151,7 +180,7 @@ :class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate (e.g. when analysing simulations done with a force field that uses very different atom names) then one should either use the value "other" for -`forcefield` to set no default values, or derive a new class and set the +`forcefield` to set no default values or derive a new class and set the default list oneself:: class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): @@ -159,11 +188,11 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} Then simply use the new class instead of the parent class and call it with -`forcefield` = "OtherFF". Please also consider contributing the list of heavy +```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy atom names to MDAnalysis. How to perform WaterBridgeAnalysis ------------------------------------ +---------------------------------- All water bridges between arginine and aspartic acid can be analysed with :: @@ -174,132 +203,378 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') w.run() -The results are stored as the attribute -:attr:`WaterBridgeAnalysis.timeseries`; see :ref:`wb_Analysis_Output` for the -format. +The maximum number of bridging waters detected can be changed using the order keyword. :: -An example of using the :attr:`~WaterBridgeAnalysis.timeseries` would be + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', + order=3) + +Thus, a maximum of three bridging waters will be detected. + +An example of using the :attr:`~WaterBridgeAnalysis` would be detecting the percentage of time a certain water bridge exits. Trajectory :code:`u` has two frames, where the first frame contains a water -bridge from the oxygen of the first arginine to the oxygen of the third -aspartate. No water bridge is detected in the second frame. :: - +bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic +group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but +is between the oxygen of the arginine and the other oxygen in the carboxylic +group (ASP3:OD2). :: + + # index residue id residue name atom name + # 0 1 ARG O + # 1 2 SOL OW + # 2 2 SOL HW1 + # 3 2 SOL HW2 + # 4 3 ASP OD1 + # 5 3 ASP OD2 print(w.timeseries) -prints out (the comments are not part of the data structure but are added here -for clarity): :: +prints out. :: [ # frame 1 - # A water bridge SOL2 links O from ARG1 and ASP3 - [[0,1,'ARG1:O', 'SOL2:HW1',3.0,180], - [2,3,'SOL2:HW2','ASP3:O', 3.0,180], + # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,4,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], ], # frame 2 - # No water bridge detected - [] + # Another water bridge SOL2 links O from ARG1 to the other oxygen of the + # carboxylic group OD2 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], + ], ] -To calculate the percentage, we can iterate through :code:`w.timeseries`. :: - water_bridge_presence = [] - for frame in w.timeseries: - if frame: - water_bridge_presence.append(True) - else: - water_bridge_presence.append(False) - p_bridge = float(sum(water_bridge_presence))/len(water_bridge_presence) - print("Fraction of time with water bridge present: {}".format(p_bridge)) +.. _wb_count_by_type: -In the example above, :code:`p_bridge` would become 0.5, i.e., for 50% of the -trajectory a water bridge was detected between the selected residues. +Use count_by_type +----------------- -Alternatively, :meth:`~WaterBridgeAnalysis.count_by_type` can also be used to +We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to generate the frequence of all water bridges in the simulation. :: w.count_by_type() Returns :: - [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'O', 0.5)] + [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), + (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] -For further data analysis, it is convenient to process the -:attr:`~WaterBridgeAnalysis.timeseries` data into a normalized table with the -:meth:`~WaterBridgeAnalysis.generate_table` method, which creates a new data -structure :attr:`WaterBridgeAnalysis.table` that contains one row for each -observation of a hydrogen bond:: +You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped +and thus, they should be counted as the same type of water bridge. The type of the water +bridge can be customised by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. - w.generate_table() +The analysis function has two parameters. The current and the output. The current is a list +of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as +:attr:`WaterBridgeAnalysis.network`, and an example will be :: -This table can then be easily turned into, e.g., a `pandas.DataFrame`_, and -further analyzed:: + [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. + [ 0, None, 2, 1, 3.0,180], + # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. + [ 3, 1, 4, None, 3.0,180],] - import pandas as pd - df = pd.DataFrame.from_records(w.table) +Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is +the final hydrogen bond ending in selection 2. The output sums up all the information in the +current frame and is a dictionary with a user-defined key and the value is the weight of the +corresponding key. During the analysis phase, the function analysis iterates through all the +water bridges and modify the output in-place. At the end of the analysis, the keys from +all the frames are collected and the corresponding values will be summed up and returned. :: + def analysis(current, output, u): + r'''This function defines how the type of water bridge should be specified. -.. _pandas.DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms.''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # if the residue name is ASP and the atom name is OD2 or OD1, + # the atom name is changed to OD + if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): + s2_name = 'OD' + # setting up the key which defines this type of water bridge. + key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + + w.count_by_type(analysis_func=analysis) -Classes -------- +Returns :: -.. autoclass:: WaterBridgeAnalysis - :members: + [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] - .. attribute:: timesteps +Note that the result is arranged in the format of ``(key, the proportion of time)``. When no +custom analysis function is supplied, the key is expanded for backward compatibility. So +that when the same code is executed, the result returned will be the same as the result +given since version 0.17.0 and the same as the +:meth:`HydrogenBondAnalysis.count_by_type`. - List of the times of each timestep. This can be used together with - :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point - of a water bridge existence, or see :attr:`~WaterBridgeAnalysis.table`. +Some people might only interested in contacts between residues and pay no attention +to the details regarding the atom name. However, since multiple water bridges can +exist between two residues, which sometimes can give a result such that the water +bridge between two residues exists 300% of the time. Though this might be a desirable +result for some people, others might want the water bridge between two residues to be +only counted once per frame. This can also be achieved by supplying an analysis function +to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # s1_name and s2_name are not included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid) + + # Each residue is only counted once per frame + output[key] = 1 + + w.count_by_type(analysis_func=analysis) - .. attribute:: table +Returns :: + + [(('ARG', 1, 'ASP', 3), 1.0),] + +On the other hand, other people may insist that the first order and second-order water +bridges shouldn't be mixed together, which can also be achieved by supplying an analysis +function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # order of the current water bridge is computed + order_of_water_bridge = len(current) - 1 + # and is included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + + w.count_by_type(analysis_func=analysis) + +The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: + + [(('ARG', 1, 'ASP', 3, 1), 1.0),] + +Some people might not be interested in the interactions related to arginine. The undesirable +interactions can be discarded by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the number of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + if not s1_resname == 'ARG': + key = (s1_resname, s1_resid, s2_resname, s2_resid) + output[key] += 1 + + w.count_by_type(analysis_func=analysis) + +Returns nothing in this case :: - A normalised table of the data in - :attr:`WaterBridgeAnalysis.timeseries`, generated by - :meth:`WaterBridgeAnalysis.generate_table`. It is a - :class:`numpy.recarray` with the following columns: + [,] - 0. "time" - 1. "donor_index" - 2. "acceptor_index" - 3. "donor_resnm" - 4. "donor_resid" - 5. "donor_atom" - 6. "acceptor_resnm" - 7. "acceptor_resid" - 8. "acceptor_atom" - 9. "distance" - 10. "angle" +Additional keywords can be supplied to the analysis function by passing through +:meth:`~WaterBridgeAnalysis.count_by_type`. :: - It takes up more space than :attr:`~WaterBridgeAnalysis.timeseries` but - it is easier to analyze and to import into databases or dataframes. + def analysis(current, output, **kwargs): + ... + w.count_by_type(analysis_func=analysis, **kwargs) - .. rubric:: Example +.. _wb_count_by_time: - For example, to create a `pandas.DataFrame`_ from ``h.table``:: +Use count_by_time +----------------- + +:meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which +might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` +provides additional functionality for aggregating results for each frame. + +The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of +water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt +bridge for example: :: + + w.count_by_time() + +As one water bridge is found in both frames, the method returns :: + + [(1.0, 1), (2.0, 1), ] + +Similar to :meth:`~WaterBridgeAnalysis.count_by_type` +The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying +an analysis function. + +Suppose we want to count + + - the **number** of water molecules involved in bridging selection 1 to selection 2. + - only if water bridge terminates in atom name **OD1 of ASP**. + - only when water bridge is joined by less than **two** water. + +The analysis function can be written as:: + + def analysis(current, output, u, **kwargs): + '''This function defines how the counting of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the number of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + + # only the residue name is ASP and the atom name is OD1, + if s2_resname == 'ASP' and s2_name == 'OD1': + # only if the order of water bridge is less than 2 + if len(current) -1 < 2: + # extract all water molecules involved in the water bridge + # extract the first water from selection 1 + s1_index, to_index, (s1_resname, s1_resid, s1_name), + (to_resname, to_resid, to_name), dist, angle = current[0] + key = (to_resname, to_resid) + output[key] = 1 + + # extract all the waters between selection 1 and selection 2 + for hbond in current[1:-1]: + # decompose the hydrogen bond. + from_index, to_index, (from_resname, from_resid, from_name), + (to_resname, to_resid, to_name), dist, angle = hbond + # add first water + key1 = (from_resname, from_resid) + output[key1] = 1 + # add second water + key2 = (to_resname, to_resid) + output[key2] = 1 + + # extract the last water to selection 2 + from_index, s2_index, (from_resname, from_resid, from_name), + (s2_resname, s2_resid, s2_name), dist, angle = current[-1] + key = (from_resname, from_resid) + output[key] = 1 + + w.count_by_time(analysis_func=analysis) + +Returns :: + + [(1.0, 1), (2.0, 0),] + +Classes +------- + +.. autoclass:: WaterBridgeAnalysis + :members: + + .. attribute:: timesteps + + List of the times of each timestep. This can be used together with + :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point + of a water bridge existence. - import pandas as pd - df = pd.DataFrame.from_records(w.table) """ -from __future__ import absolute_import, division -import six +from __future__ import print_function, absolute_import, division from collections import defaultdict import logging import warnings - +import six import numpy as np -from .hbond_analysis import HydrogenBondAnalysis +from ..base import AnalysisBase from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch -from MDAnalysis.lib.log import ProgressMeter +from MDAnalysis import NoDataError, MissingDataWarning, SelectionError from MDAnalysis.lib import distances -from MDAnalysis import SelectionWarning -logger = logging.getLogger('MDAnalysis.analysis.wbridges') +logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') -class WaterBridgeAnalysis(HydrogenBondAnalysis): +class WaterBridgeAnalysis(AnalysisBase): """Perform a water bridge analysis The analysis of the trajectory is performed with the @@ -313,14 +588,48 @@ class WaterBridgeAnalysis(HydrogenBondAnalysis): .. versionadded:: 0.17.0 + + """ + # use tuple(set()) here so that one can just copy&paste names from the + # table; set() takes care for removing duplicates. At the end the + # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. + + #: default heavy atom names whose hydrogens are treated as *donors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `donors` to add a list of additional donor names. + DEFAULT_DONORS = { + 'CHARMM27': tuple(set([ + 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', + 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), + 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), + 'other': tuple(set([]))} + + #: default atom names that are treated as hydrogen *acceptors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `acceptors` to add a list of additional acceptor names. + DEFAULT_ACCEPTORS = { + 'CHARMM27': tuple(set([ + 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', + 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), + 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), + 'other': tuple(set([]))} + + #: A :class:`collections.defaultdict` of covalent radii of common donors + #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is + #: sufficiently close to its donor heavy atom). Values are stored for + #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens + #: covalently bound at a maximum distance of 1.5 Å. + r_cov = defaultdict(lambda: 1.5, # default value + N=1.31, O=1.31, P=1.58, S=1.55) + def __init__(self, universe, selection1='protein', - selection2='not resname SOL', water_selection='resname SOL', - selection1_type='both', update_selection1=False, - update_selection2=False, update_water_selection=True, + selection2='not resname SOL', water_selection='resname SOL', order=1, + selection1_type='both', update_selection=False, update_water_selection=True, filter_first=True, distance_type='hydrogen', distance=3.0, angle=120.0, forcefield='CHARMM27', donors=None, - acceptors=None, debug=None, verbose=False, **kwargs): + acceptors=None, output_format="sele1_sele2", debug=None, verbose=False, + pbc=False, **kwargs): """Set up the calculation of water bridges between two selections in a universe. @@ -350,8 +659,17 @@ def __init__(self, universe, selection1='protein', name "SOL". Change it to the appropriate selection for your specific force field. - However, in theory this selection can be anything which forms - hydrogen bond with selection 1 and selection 2. + However, in theory, this selection can be anything which forms + a hydrogen bond with selection 1 and selection 2. + order : int (optional) + The maximum number of water bridges linking both selections. + if the order is set to 3, then all the residues linked with less than + three water molecules will be detected. [1] + + Computation of high order water bridges can be very time-consuming. + Think carefully before running the calculation, do you really want + to compute the 20th order water bridge between domain A and domain B + or you just want to know the third order water bridge between two residues. selection1_type : {"donor", "acceptor", "both"} (optional) Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the value for `selection1_type` automatically determines how @@ -359,15 +677,12 @@ def __init__(self, universe, selection1='protein', 'both' then `selection2` will also contain 'both'. If `selection1` is set to 'donor' then `selection2` is 'acceptor' (and vice versa). ['both']. - update_selection1 : bool (optional) - Update selection 1 at each frame. Setting to ``True`` if the - selection is not static. Selection 1 is filtered first to speed up + update_selection : bool (optional) + Update selection 1 and 2 at each frame. Setting to ``True`` if the + selection is not static. Selections are filtered first to speed up performance. Thus, setting to ``True`` is recommended if contact surface between selection 1 and selection 2 is constantly changing. [``False``] - update_selection2 : bool (optional) - Similiar to *update_selection1* but is acted upon selection 2. - [``False``] update_water_selection : bool (optional) Update selection of water at each frame. Setting to ``False`` is **only** recommended when the total amount of water molecules in the @@ -380,10 +695,10 @@ def __init__(self, universe, selection1='protein', ``True`` so as to filter out water not residing between the two selections. [``True``] filter_first : bool (optional) - Filter the water selection to only include water within 3 * - `distance` away from `both` selection 1 and selection 2. + Filter the water selection to only include water within 4 Å + `order` * + (2 Å + `distance`) away from `both` selection 1 and selection 2. Selection 1 and selection 2 are both filtered to only include atoms - 3 * `distance` away from the other selection. [``True``] + with the same distance away from the other selection. [``True``] distance : float (optional) Distance cutoff for hydrogen bonds; only interactions with a H-A distance <= `distance` (and the appropriate D-H-A angle, see @@ -393,7 +708,7 @@ def __init__(self, universe, selection1='protein', Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of 180º. A hydrogen bond is only recorded if the D-H-A angle is >= `angle`. The default of 120º also finds fairly non-specific - hydrogen interactions and a possibly better value is 150º. [120.0] + hydrogen interactions and possibly better value is 150º. [120.0] forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) Name of the forcefield used. Switches between different :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and @@ -408,9 +723,14 @@ def __init__(self, universe, selection1='protein', sequence. distance_type : {"hydrogen", "heavy"} (optional) Measure hydrogen bond lengths between donor and acceptor heavy - attoms ("heavy") or between donor hydrogen and acceptor heavy + atoms ("heavy") or between donor hydrogen and acceptor heavy atom ("hydrogen"). If using "heavy" then one should set the *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] + output_format: {"sele1_sele2", "donor_acceptor"} (optional) + Setting the output format for timeseries and table. If set to + "sele1_sele2", for each hydrogen bond, the one close to selection 1 + will be placed before selection 2. If set to "donor_acceptor", the + donor will be placed before acceptor. "sele1_sele2"] debug : bool (optional) If set to ``True`` enables per-frame debug logging. This is disabled by default because it generates a very large amount of output in @@ -428,58 +748,111 @@ def __init__(self, universe, selection1='protein', If selection 1 and selection 2 are very mobile during the simulation and the contact surface is constantly changing (i.e. residues are - moving farther than 3 x `distance`), you might consider setting the - `update_selection1` and `update_selection2` keywords to ``True`` to - ensure correctness. + moving farther than 4 Å + `order` * (2 Å + `distance`)), you might + consider setting the `update_selection` keywords to ``True`` + to ensure correctness. + + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated + see :attr:`WaterBridgeAnalysis.timeseries` for detail. + This class is now based on + :class:`~MDAnalysis.analysis.base.AnalysisBase`. + + """ + super(WaterBridgeAnalysis, self).__init__(universe.trajectory, + **kwargs) self.water_selection = water_selection self.update_water_selection = update_water_selection - super(WaterBridgeAnalysis, self).__init__( - universe=universe, selection1=selection1, selection2=selection2, - selection1_type=selection1_type, - update_selection1=update_selection1, - update_selection2=update_selection2, filter_first=filter_first, - distance_type=distance_type, distance=distance, angle=angle, - forcefield=forcefield, donors=donors, acceptors=acceptors, - debug=debug, verbose=verbose, **kwargs) - self._update_water_selection() + # per-frame debugging output? + self.debug = debug + + # set the output format + self.output_format = output_format + + self.u = universe + self.selection1 = selection1 + self.selection2 = selection2 + self.selection1_type = selection1_type + + # if the selection 1 and selection 2 are the same + if selection1 == selection2: + # eliminate the duplication + self.selection1_type = "donor" + self.update_selection = update_selection + self.filter_first = filter_first + self.distance = distance + self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior + self.angle = angle + self.pbc = pbc and all(self.u.dimensions[:3]) + self.order = order + + # set up the donors/acceptors lists + if donors is None: + donors = [] + if acceptors is None: + acceptors = [] + self.forcefield = forcefield + self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) + self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) + + if self.selection1_type not in ('both', 'donor', 'acceptor'): + raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format( + self.selection1_type)) + + self._network = [] # final result accessed as self.network + self.timesteps = None # time for each frame + + self._log_parameters() def _log_parameters(self): """Log important parameters to the logfile.""" - logger.info("WBridge analysis: selection1 = %r (update: %r)", - self.selection1, self.update_selection1) - logger.info("WBridge analysis: selection2 = %r (update: %r)", - self.selection2, self.update_selection2) - logger.info("WBridge analysis: water selection = %r (update: %r)", + logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", + self.selection2, self.update_selection) + logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", self.water_selection, self.update_water_selection) - logger.info("WBridge analysis: criterion: donor %s atom and acceptor \ + logger.info("WaterBridgeAnalysis: criterion: donor %s atom and acceptor \ atom distance <= %.3f A", self.distance_type, self.distance) - logger.info("WBridge analysis: criterion: angle D-H-A >= %.3f degrees", + logger.info("WaterBridgeAnalysis: criterion: angle D-H-A >= %.3f degrees", self.angle) - logger.info("WBridge analysis: force field %s to guess donor and \ + logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ acceptor names", self.forcefield) - logger.info("WBridge analysis: bonded hydrogen detection algorithm: \ - %r", self.detect_hydrogens) - def _update_selection_1(self): + def _update_selection(self): self._s1 = self.u.select_atoms(self.selection1) self._s2 = self.u.select_atoms(self.selection2) + if self.filter_first and self._s1: self.logger_debug('Size of selection 1 before filtering:' ' {} atoms'.format(len(self._s1))) - ns_selection_1 = AtomNeighborSearch(self._s1) - self._s1 = ns_selection_1.search(self._s2, 3. * self.distance) + ns_selection_1 = AtomNeighborSearch(self._s1, box=self.box) + self._s1 = ns_selection_1.search(self._s2, self.selection_distance) self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) - self._s1_donors = {} - self._s1_donors_h = {} - self._s1_acceptors = {} + + if not self._s1: + logger.warning('Selection 1 "{0}" did not select any atoms.' + .format(str(self.selection1)[:80])) + return + + if self.filter_first and self._s2: + self.logger_debug('Size of selection 2 before filtering:' + ' {} atoms'.format(len(self._s2))) + ns_selection_2 = AtomNeighborSearch(self._s2, box=self.box) + self._s2 = ns_selection_2.search(self._s1, self.selection_distance) + self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) + + if not self._s2: + logger.warning('Selection 2 "{0}" did not select any atoms.' + .format(str(self.selection2)[:80])) + return + if self.selection1_type in ('donor', 'both'): self._s1_donors = self._s1.select_atoms( 'name {0}'.format(' '.join(self.donors))) self._s1_donors_h = {} - for i, d in enumerate(self._s1_donors): - tmp = self._get_bonded_hydrogens(d) + for i, atom in enumerate(self._s1_donors): + tmp = self._get_bonded_hydrogens(atom) if tmp: self._s1_donors_h[i] = tmp self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) @@ -489,37 +862,36 @@ def _update_selection_1(self): 'name {0}'.format(' '.join(self.acceptors))) self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) - def _sanity_check(self, selection, htype): - """sanity check the selections 1 and 2 - - *selection* is 1 or 2, *htype* is "donors" or "acceptors" - """ - assert selection in (1, 2) - assert htype in ("donors", "acceptors") - atoms = getattr(self, "_s{0}_{1}".format(selection, htype)) - update = getattr(self, "update_selection{0}".format(selection)) - if not atoms: - errmsg = "No {1} found in selection {0}. " \ - "You might have to specify a custom '{1}' keyword.".format( - selection, htype) - warnings.warn(errmsg, category=SelectionWarning) - logger.warning(errmsg) + if not self._s2: + return None + if self.selection1_type in ('donor', 'both'): + self._s2_acceptors = self._s2.select_atoms( + 'name {0}'.format(' '.join(self.acceptors))) + self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) + if self.selection1_type in ('acceptor', 'both'): + self._s2_donors = self._s2.select_atoms( + 'name {0}'.format(' '.join(self.donors))) + self._s2_donors_h = {} + for i, d in enumerate(self._s2_donors): + tmp = self._get_bonded_hydrogens(d) + if tmp: + self._s2_donors_h[i] = tmp + self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) + self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_donors_h))) def _update_water_selection(self): self._water = self.u.select_atoms(self.water_selection) self.logger_debug('Size of water selection before filtering:' ' {} atoms'.format(len(self._water))) - if self.filter_first: - ns_water_selection = AtomNeighborSearch(self._water) - self._water = ns_water_selection.search(self._s1, - 3. * self.distance) - self._water = ns_water_selection.search(self._s2, - 3. * self.distance) + if self._water and self.filter_first: + filtered_s1 = AtomNeighborSearch(self._water, box=self.box).search(self._s1, + self.selection_distance) + if filtered_s1: + self._water = AtomNeighborSearch(filtered_s1, box=self.box).search(self._s2, + self.selection_distance) self.logger_debug("Size of water selection: {0} atoms".format(len(self._water))) - self._water_donors = {} - self._water_donors_h = {} - self._water_acceptors = {} + if not self._water: logger.warning("Water selection '{0}' did not select any atoms." .format(str(self.water_selection)[:80])) @@ -537,275 +909,370 @@ def _update_water_selection(self): 'name {0}'.format(' '.join(self.acceptors))) self.logger_debug("Water acceptors: {0}".format(len(self._water_acceptors))) - def run(self, start=None, stop=None, step=None, verbose=None, debug=None): - """Analyze trajectory and produce timeseries. + def _get_bonded_hydrogens(self, atom): + """Find hydrogens bonded within cutoff to `atom`. + + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); + this is not fool-proof as the atom type is not always a character but + the name pattern should catch most typical occurrences. + + The distance from `atom` is calculated for all hydrogens in the residue + and only those within a cutoff are kept. The cutoff depends on the + heavy atom (more precisely, on its element, which is taken as the first + letter of its name ``atom.name[0]``) and is parameterized in + :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the + default of 1.5 Å is used. - Stores the water bridge data per frame as - :attr:`WaterBridgeAnalysis.timeseries` (see there for output - format). Parameters ---------- - start : int (optional) - starting frame-index for analysis, ``None`` is the first one, 0. - `start` and `stop` are 0-based frame indices and are used to slice - the trajectory (if supported) [``None``] - stop : int (optional) - last trajectory frame for analysis, ``None`` is the last one - [``None``] - step : int (optional) - read every `step` between `start` (included) and `stop` (excluded), - ``None`` selects 1. [``None``] - verbose : bool (optional) - toggle progress meter output - :class:`~MDAnalysis.lib.log.ProgressMeter` [``True``] - debug : bool (optional) - enable detailed logging of debugging information; this can create - *very big* log files so it is disabled (``False``) by default; - setting `debug` toggles the debug status for - :class:`WaterBridgeAnalysis`, namely the value of - :attr:`WaterBridgeAnalysis.debug`. - - See Also - -------- - :meth:`WaterBridgeAnalysis.generate_table` : - processing the data into a different format. + atom : groups.Atom + heavy atom + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. """ - self._setup_frames(self.u.trajectory, start, stop, step) + try: + return atom.residue.atoms.select_atoms( + "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" + "".format(self.r_cov[atom.name[0]], atom.name)) + except NoDataError: + return [] - logger.info("WBridge analysis: starting") - logger.debug("WBridge analysis: donors %r", self.donors) - logger.debug("WBridge analysis: acceptors %r", self.acceptors) - logger.debug("WBridge analysis: water bridge %r", self.water_selection) + def logger_debug(self, *args): + if self.debug: + logger.debug(*args) - if debug is not None and debug != self.debug: - self.debug = debug - logger.debug("Toggling debug to %r", self.debug) - if not self.debug: - logger.debug("WBridge analysis: For full step-by-step debugging output use debug=True") - self._timeseries = [] + def _prepare(self): + # The distance for selection is defined as twice the maximum bond length of an O-H bond (2A) + # plus order of water bridge times the length of OH bond plus hydrogne bond distance + self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) + + self._s1_donors = {} + self._s1_donors_h = {} + self._s1_acceptors = {} + + self._s2_donors = {} + self._s2_donors_h = {} + self._s2_acceptors = {} + + self.box = self.u.dimensions if self.pbc else None + self._update_selection() + + self._water_donors = {} + self._water_donors_h = {} + self._water_acceptors = {} + self.timesteps = [] - self._water_network = [] + if self._s1 and self._s2: + self._update_water_selection() + logger.info("WaterBridgeAnalysis: no atoms found in the selection.") + + logger.info("WaterBridgeAnalysis: initial checks passed.") + + logger.info("WaterBridgeAnalysis: starting") + logger.debug("WaterBridgeAnalysis: donors %r", self.donors) + logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) + logger.debug("WaterBridgeAnalysis: water bridge %r", self.water_selection) - if verbose is None: - verbose = self._verbose - pm = ProgressMeter(self.n_frames, - format="WBridge frame {current_step:5d}: {step:5d}/{numsteps} [{percentage:5.1f}%]\r", - verbose=verbose) + if self.debug: + logger.debug("Toggling debug to %r", self.debug) + else: + logger.debug("WaterBridgeAnalysis: For full step-by-step debugging output use debug=True") logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", self.start, self.stop, self.step) - for progress, ts in enumerate(self.u.trajectory[self.start:self.stop:self.step]): - # all bonds for this timestep - - # dict of tuples (atom.index, atom.index) for quick check if - # we already have the bond (to avoid duplicates) - - frame = ts.frame - timestep = ts.time - self.timesteps.append(timestep) - pm.echo(progress, current_step=frame) - self.logger_debug("Analyzing frame %(frame)d, timestep %(timestep)f ps", vars()) - if self.update_selection1: - self._update_selection_1() - if self.update_selection2: - self._update_selection_2() + def _donor2acceptor(self, donors, donor_hs, acceptor): + result = [] + ns_acceptors = AtomNeighborSearch(acceptor, self.box) + for i, donor_h_set in donor_hs.items(): + d = donors[i] + for h in donor_h_set: + res = ns_acceptors.search(h, self.distance) + for a in res: + donor_atom = h if self.distance_type != 'heavy' else d + dist = distances.calc_bonds(donor_atom.position, + a.position, box=self.box) + if dist <= self.distance: + angle = distances.calc_angles(d.position, h.position, + a.position, box=self.box) + angle = np.rad2deg(angle) + if angle >= self.angle: + self.logger_debug( + "D: {0!s} <-> A: {1!s} {2:f} A, {3:f} DEG" \ + .format(h.index, a.index, dist, angle)) + result.append((h.index, d.index, a.index, + (h.resname, h.resid, h.name), + (a.resname, a.resid, a.name), + dist, angle)) + return result + + def _single_frame(self): + self.timesteps.append(self._ts.time) + self.box = self.u.dimensions if self.pbc else None + + if self.update_selection: + self._update_selection() + if self._s1 and self._s2: if self.update_water_selection: self._update_water_selection() + else: + self._network.append(defaultdict(dict)) + return - s1_frame_results_dict = defaultdict(list) - if (self.selection1_type in ('donor', 'both') and - self._water_acceptors): + selection_1 = [] + water_pool = defaultdict(list) + next_round_water = set([]) + selection_2 = [] + if self.selection1_type in ('donor', 'both'): + # check for direct hbond from s1 to s2 + if self._s2_acceptors: + self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") + results = self._donor2acceptor(self._s1_donors, self._s1_donors_h, self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)] = None + selection_1.append((h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + if self.order > 0 and self._water_acceptors: self.logger_debug("Selection 1 Donors <-> Water Acceptors") - ns_acceptors = AtomNeighborSearch(self._water_acceptors) - for i, donor_h_set in self._s1_donors_h.items(): - d = self._s1_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, - a.position) - if dist <= self.distance: - angle = distances.calc_angles(d.position, h.position, - a.position) - angle = np.rad2deg(angle) - if angle >= self.angle: - self.logger_debug( - "S1-D: {0!s} <-> W-A: {1!s} {2:f} A, {3:f} DEG"\ - .format(h.index, a.index, dist, angle)) - s1_frame_results_dict[(a.resname, a.resid)].append( - (h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle)) - - if (self.selection1_type in ('acceptor', 'both') and - self._s1_acceptors): + results = self._donor2acceptor(self._s1_donors, self._s1_donors_h, self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + next_round_water.add((a_resname, a_resid)) + selection_1.append((h_index, d_index, a_index, None, dist, angle)) + if (self.selection1_type in ('acceptor', 'both') and + self._s1_acceptors): + self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") + results = self._donor2acceptor(self._s2_donors, self._s2_donors_h, self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)] = None + selection_1.append((a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + if self.order > 0: self.logger_debug("Selection 1 Acceptors <-> Water Donors") - ns_acceptors = AtomNeighborSearch(self._s1_acceptors) - for i, donor_h_set in self._water_donors_h.items(): - d = self._water_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, - a.position) - if dist <= self.distance: - angle = distances.calc_angles(d.position, h.position, - a.position) - angle = np.rad2deg(angle) - if angle >= self.angle: - self.logger_debug( - "S1-A: {0!s} <-> W-D: {1!s} {2:f} A, {3:f} DEG"\ - .format(a.index, h.index, dist, angle)) - s1_frame_results_dict[(h.resname, h.resid)].append( - (h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle)) + results = self._donor2acceptor(self._water_donors, self._water_donors_h, self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + next_round_water.add((h_resname, h_resid)) + selection_1.append((a_index, None, h_index, d_index, dist, angle)) + for i in range(self.order): # Narrow down the water selection - selection_resn_id = list(s1_frame_results_dict.keys()) - if not selection_resn_id: - self._timeseries.append([]) - continue + selection_resn_id = list(next_round_water) + if (not selection_resn_id) or (not self._water): + logger.warning("No water forming hydrogen bonding with selection 1.") + break selection_resn_id = ['(resname {} and resid {})'.format( resname, resid) for resname, resid in selection_resn_id] water_bridges = self._water.select_atoms(' or '.join(selection_resn_id)) - self.logger_debug("Size of water bridge selection: {0} atoms".format(len(water_bridges))) - if not water_bridges: - logger.warning("No water forming hydrogen bonding with selection 1.") + self.logger_debug("Size of water bridge selection for the {} order of water bridge: {} atoms".format(i+1, + len(water_bridges))) + water_bridges_donors = water_bridges.select_atoms( 'name {0}'.format(' '.join(self.donors))) water_bridges_donors_h = {} - for i, d in enumerate(water_bridges_donors): + for j, d in enumerate(water_bridges_donors): tmp = self._get_bonded_hydrogens(d) if tmp: - water_bridges_donors_h[i] = tmp + water_bridges_donors_h[j] = tmp self.logger_debug("water bridge donors: {0}".format(len(water_bridges_donors))) self.logger_debug("water bridge donor hydrogens: {0}".format(len(water_bridges_donors_h))) water_bridges_acceptors = water_bridges.select_atoms( 'name {0}'.format(' '.join(self.acceptors))) - self.logger_debug("water bridge: {0}".format(len(water_bridges_acceptors))) - - # Finding the hydrogen bonds between water bridge and selection 2 - s2_frame_results_dict = defaultdict(list) - if self._s2_acceptors: - self.logger_debug("Water bridge Donors <-> Selection 2 Acceptors") - ns_acceptors = AtomNeighborSearch(self._s2_acceptors) - for i, donor_h_set in water_bridges_donors_h.items(): - d = water_bridges_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, - a.position) - if dist <= self.distance: - angle = distances.calc_angles(d.position, h.position, - a.position) - angle = np.rad2deg(angle) - if angle >= self.angle: - self.logger_debug( - "WB-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG"\ - .format(h.index, a.index, dist, angle)) - s2_frame_results_dict[(h.resname, h.resid)].append( - (h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle)) - - if water_bridges_acceptors: - self.logger_debug("Selection 2 Donors <-> Selection 2 Acceptors") - ns_acceptors = AtomNeighborSearch(water_bridges_acceptors) - for i, donor_h_set in self._s2_donors_h.items(): - d = self._s2_donors[i] - for h in donor_h_set: - res = ns_acceptors.search(h, self.distance) - for a in res: - donor_atom = h if self.distance_type != 'heavy' else d - dist = distances.calc_bonds(donor_atom.position, - a.position) - if dist <= self.distance: - angle = distances.calc_angles(d.position, h.position, - a.position) - angle = np.rad2deg(angle) - if angle >= self.angle: - self.logger_debug( - "WB-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG"\ - .format(a.index, h.index, dist, angle)) - s2_frame_results_dict[(a.resname, a.resid)].append( - (h.index, a.index, - (h.resname, h.resid, h.name), - (a.resname, a.resid, a.name), - dist, angle)) - - # Generate the water network - water_network = {} - for key in s2_frame_results_dict: - s1_frame_results = set(s1_frame_results_dict[key]) - s2_frame_results = set(s2_frame_results_dict[key]) - if len(s1_frame_results.union(s2_frame_results)) > 1: - # Thus if selection 1 and selection 2 are the same and both - # only form a single hydrogen bond with a water, this entry - # won't be included. - water_network[key] = [s1_frame_results, - s2_frame_results.difference(s1_frame_results)] - # Generate frame_results - frame_results = [] - for s1_frame_results, s2_frame_results in water_network.values(): - frame_results.extend(list(s1_frame_results)) - frame_results.extend(list(s2_frame_results)) - - self._timeseries.append(frame_results) - self._water_network.append(water_network) - - - logger.info("WBridge analysis: complete; timeseries %s.timeseries", - self.__class__.__name__) - @staticmethod - def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"): - """ - .. deprecated:: 1.0 - This is a compatibility layer so the water bridge - _timeseries to timeserie conversion can perform as it does in the - hydrogen bond analysis. - - For more information about the function of _reformat_hb in hydrogen - bond analysis, please refer to the _reformat_hb in the hydrogen - bond analysis module. + self.logger_debug("water bridge acceptors: {0}".format(len(water_bridges_acceptors))) + + if i < self.order: + next_round_water = set([]) + # first check if the water can touch the other end + # Finding the hydrogen bonds between water bridge and selection 2 + if self._s2_acceptors: + self.logger_debug("Order {} water donor <-> Selection 2 Acceptors".format(i + 1)) + results = self._donor2acceptor(water_bridges_donors, water_bridges_donors_h, self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + + # Finding the hydrogen bonds between water bridge and selection 2 + if water_bridges_acceptors: + self.logger_debug("Selection 2 Donors <-> Order {} water".format(i + 1)) + results = self._donor2acceptor(self._s2_donors, self._s2_donors_h, water_bridges_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + # find the water water hydrogen bond + if water_bridges_acceptors: + self.logger_debug("Order {} water acceptor <-> Order {} water donor".format(i + 1, i + 2)) + results = self._donor2acceptor(self._water_donors, self._water_donors_h, water_bridges_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) + next_round_water.add((h_resname, h_resid)) + + if water_bridges_donors_h: + self.logger_debug("Order {} water donor <-> Order {} water acceptor".format(i + 1, i + 2)) + results = self._donor2acceptor(water_bridges_donors, water_bridges_donors_h, self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + next_round_water.add((a_resname, a_resid)) + + # eliminate the water which has been tested + next_round_water = next_round_water.difference(set(water_pool.keys())) + + # solve the connectivity network + # The following code attempt to generate a water network which is formed by the class dict. + # Suppose we have a water bridge connection ARG1 to ASP3 via the two hydrogen bonds. + # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], + # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], + # The resulting network will be + #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} + # Where the key of the a dict will be all the hydrogen bonds starting from this nodes. + # The corresponding value of a certain key will be a dictionary whose key will be all the hydrogen bonds from + # the destination of in the key. + # If the value of a certain key is None, which means it is reaching selection 2. + + result = {'start': defaultdict(dict), 'water': defaultdict(dict)} + + def add_route(result, route): + if len(route) == 1: + result['start'][route[0]] = None + else: + # exclude the the selection which goes back to itself + if (sorted(route[0][0:3:2]) == sorted(route[-1][0:3:2])): + return + + # selection 2 to water + result['water'][route[-1]] = None + # water to water + for i in range(1, len(route) - 1): + result['water'][route[i]][route[i+1]] = result['water'][route[i+1]] + # selection 1 to water + result['start'][route[0]][route[1]] = result['water'][route[1]] + + def traverse_water_network(graph, node, end, route, maxdepth, result): + if len(route) > self.order + 1: + return + else: + if node in end: + # check if any duplication happens + if len(route) == len(set(route)): + add_route(result, route) + else: + for new_node in graph[node]: + new_route = route[:] + new_route.append(new_node) + new_node = self._expand_timeseries(new_node,'sele1_sele2')[3][:2] + traverse_water_network(graph, new_node, end, new_route, maxdepth, result) + for s1 in selection_1: + route = [s1, ] + next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] + traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) + + self._network.append(result['start']) + + def _traverse_water_network(self, graph, current, analysis_func=None, output=None, link_func=None, **kwargs): + ''' + This function recursively traverses the water network self._network and finds the hydrogen bonds which connect + the current atom to the next atom. The newly found hydrogen bond will be appended to the hydrogen bonds + connecting the selection 1 to the current atom via link_func. When selection 2 is reached, the full list of + hydrogen bonds connecting the selection 1 to selection 2 will be fed into analysis_func, which will then modify + the output in place. + :param graph: The connection network describes the connection between the atoms in the water network. + :param current: The hydrogen bonds from selection 1 until now. + :param analysis_func: The analysis function which is called to analysis the hydrogen bonds. + :param output: where the result is stored. + :param link_func: The new hydrogen bonds will be appended to current. + :param kwargs: the keywords which are passed into the analysis_func. + :return: + ''' + if link_func is None: + # If no link_func is provided, the default link_func will be used + link_func = self._full_link + + if graph is None: + # if selection 2 is reached + if not analysis_func is None: + # the result is analysed by analysis_func which will change the output + analysis_func(current, output, self.u, **kwargs) + else: + # make sure no loop can occur + if len(current) <= self.order: + for node in graph: + # the new hydrogen bond will be added to the existing bonds + new = link_func(current, node) + self._traverse_water_network(graph[node], new, analysis_func, output, link_func, **kwargs) + + def _expand_index(self, index): + ''' + Expand the index into (resname, resid, name). + ''' + atom = self.u.atoms[index] + return (atom.resname, atom.resid, atom.name) - As the _timeseries to timeserie conversion will be deprecated in 1.0 - this function will automatically lose its value. - """ + def _expand_timeseries(self, entry, output_format=None): + ''' + Expand the compact data format into the old timeseries form. + The old is defined as the format for release up to 0.19.2. + As is discussed in Issue #2177, the internal storage of the hydrogen + bond information has been changed to the compact format. + The function takes in the argument `output_format` to see which output format will be chosen. + if `output_format` is not specified, the value will be taken from :attr:`output_format`. + If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: + + # donor from selection 1 to acceptor in selection 2 + [sele1_index, sele2_index, + (sele1_resname, sele1_resid, sele1_name), + (sele2_resname, sele2_resid, sele2_name), dist, angle] + + If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: + + # From donor to acceptor + [donor_index, acceptor_index, + (donor_resname, donor_resid, donor_name), + (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] + ''' + output_format = output_format or self.output_format + # Expand the compact entry into atom1, which is the first index in the output and atom2, which is the second + # entry. + atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry + if output_format == 'sele1_sele2': + # If the output format is the sele1_sele2, no change will be executed + atom1, atom2 = atom1, atom2 + elif output_format == 'donor_acceptor': + # If the output format is donor_acceptor, use heavy atom position to check which is donor and which is + # acceptor + if heavy_atom1 is None: + # atom1 is hydrogen bond acceptor and thus, the position of atom1 and atom2 are swapped. + atom1, atom2 = atom2, atom1 + else: + # atom1 is hydrogen bond donor, position not swapped. + atom1, atom2 = atom1, atom2 + else: + raise KeyError("Only 'sele1_sele2' or 'donor_acceptor' are allowed as output format") - return (list(hb[:2]) - + [atomformat.format(hb[2]), atomformat.format(hb[3])] - + list(hb[4:])) + return (atom1, atom2, self._expand_index(atom1), self._expand_index(atom2), dist, angle) - @property - def timeseries(self): + def _generate_timeseries(self, output_format=None): r'''Time series of water bridges. - Due to the intrinsic complexity of water bridge problem, where several - atoms :math:`n` can be linked by the same water. A simple ``atom 1 - - water bridge - atom 2`` mapping will create a very large list - :math:`n!/(2(n-2)!)` due to the rule of combination. - - Thus, the output is arranged based on each individual bridging water - allowing the later reconstruction of the water network. The hydrogen - bonds from selection 1 to the **first** bridging water is followed by - the hydrogen bonds from selection 2 to the **same** bridging water. - After that, hydrogen bonds from selection 1 to the **second** bridging - water is succeeded by hydrogen bonds from selection 2 to the **same - second** bridging water. An example of the output is given in - :ref:`wb_Analysis_Output`. + The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. + The format of output can be changed via the output_format selection. + If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional + link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, + for each hydrogen bond, the donor is always written before the acceptor. Note ---- @@ -813,7 +1280,7 @@ def timeseries(self): *index* one would use ``u.atoms[acceptor_index]``. The :attr:`timeseries` is a managed attribute and it is generated - from the underlying data in :attr:`_timeseries` every time the + from the underlying data in :attr:`_network` every time the attribute is accessed. It is therefore costly to call and if :attr:`timeseries` is needed repeatedly it is recommended that you assign to a variable:: @@ -822,94 +1289,274 @@ def timeseries(self): w.run() timeseries = w.timeseries - See Also - -------- - :attr:`table` : structured array of the data + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated where + the donor and acceptor string has been changed to tuple + (resname string, resid, name_string). + + ''' - return super(WaterBridgeAnalysis, self).timeseries + output_format = output_format or self.output_format + def analysis(current, output, *args, **kwargs): + output = current - def generate_table(self): - """Generate a normalised table of the results. + timeseries = [] + for frame in self._network: + new_frame = [] + self._traverse_water_network(frame, new_frame, analysis_func=analysis, + output=new_frame, link_func=self._compact_link) + timeseries.append([self._expand_timeseries(entry, output_format) for entry in new_frame]) + return timeseries - The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~WaterBridgeAnalysis.table`. + timeseries = property(_generate_timeseries) - See Also - -------- - WaterBridgeAnalysis.table - """ - super(WaterBridgeAnalysis, self).generate_table() + def _get_network(self): + r'''Network representation of the water network. - def count_by_type(self): + The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. + Each hydrogen bond has a compact representation of :: + + [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] + + or :: + + [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] + + The donor_heavy_idx is the heavy atom bonding to the proton and atoms + can be retrived from the universe:: + + atom = u.atoms[idx] + + .. versionadded:: 0.20.0 + + ''' + return self._network + + def set_network(self, network): + self._network = network + + network = property(_get_network, set_network) + + @classmethod + def _full_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + ''' + result = output[:] + result.append(node) + return result + + @classmethod + def _compact_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. + In this form no new list is created and thus, one bridge will only appear once. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + ''' + output.append(node) + return output + + def _count_by_type_analysis(self, current, output, *args, **kwargs): + ''' + Generates the key for count_by_type analysis. + :return: + ''' + + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_type(self, analysis_func=None, **kwargs): """Counts the frequency of water bridge of a specific type. If one atom *A* from *selection 1* is linked to atom *B* from - *selection 2* through a bridging water, an entity will be created and + *selection 2* through one or more bridging waters, an entity will be created and the proportion of time that this linkage exists in the whole simulation will be calculated. - Returns a :class:`numpy.recarray` containing atom indices for *A* and - *B*, residue names, residue numbers, atom names (for both A and B) and - the fraction of the total time during which the water bridge was - detected. This method returns None if method - :meth:`WaterBridgeAnalysis.run` was not executed first. + The identification of a specific type of water bridge can be modified by + supplying the analysis_func function. See :ref:`wb_count_by_type` + for detail. Returns ------- - counts : numpy.recarray - Each row of the array contains data to define a unique water - bridge together with the frequency (fraction of the total time) - that it has been observed. + counts : list + Returns a :class:`list` containing atom indices for *A* and + *B*, residue names, residue numbers, atom names (for both A and B) and + the fraction of the total time during which the water bridge was + detected. This method returns None if method + :meth:`WaterBridgeAnalysis.run` was not executed first. + + """ - if not self._has_timeseries(): - return + output = None + if analysis_func is None: + analysis_func = self._count_by_type_analysis + output = 'combined' + + if self._network: + length = len(self._network) + result_dict = defaultdict(int) + for frame in self._network: + frame_dict = defaultdict(int) + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=frame_dict, + link_func=self._full_link, **kwargs) + for key, value in frame_dict.items(): + result_dict[key] += frame_dict[key] + + if output is 'combined': + result = [[i for i in key] for key in result_dict] + [result[i].append(result_dict[key]/length) for i, key in enumerate(result_dict)] + else: + result = [(key, result_dict[key]/length) for key in result_dict] + return result + else: + return None - wbridges = defaultdict(int) - for wframe in self._water_network: - pairs = set([]) - for water, (selection1, selection2) in wframe.items(): - for (donor_index, acceptor_index, donor, acceptor, distance, - angle) in selection1: - if donor[:2] == water: - sele1 = acceptor - sele1_index = acceptor_index + def _count_by_time_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_time(self, analysis_func=None, **kwargs): + """Counts the number of water bridges per timestep. + + The counting behaviour can be adjusted by supplying analysis_func. + See :ref:`wb_count_by_time` for details. + + Returns + ------- + counts : list + Returns a time series ``N(t)`` where ``N`` is the total + number of observed water bridges at time ``t``. + + """ + if analysis_func is None: + analysis_func = self._count_by_time_analysis + if self._network: + result = [] + for time, frame in zip(self.timesteps, self._network): + result_dict = defaultdict(int) + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=result_dict, + link_func=self._full_link, **kwargs) + result.append((time, sum([result_dict[key] for key in result_dict]))) + return result + else: + return None + + def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key].append(kwargs.pop('time')) + + def timesteps_by_type(self, analysis_func=None, **kwargs): + """Frames during which each water bridges existed, sorted by each water bridges. + + Processes :attr:`WaterBridgeAnalysis._network` and returns a + :class:`list` containing atom indices, residue names, residue + numbers (from selection 1 and selection 2) and each timestep at which the + water bridge was detected. + + Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and + :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by + supplying an analysis_func. + + Returns + ------- + data : list + + """ + output = None + if analysis_func is None: + analysis_func = self._timesteps_by_type_analysis + output = 'combined' + + if self._network: + result = defaultdict(list) + if self.timesteps is None: + timesteps = range(len(self._network)) + else: + timesteps = self.timesteps + for time, frame in zip(timesteps, self._network): + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=result, + link_func=self._full_link, time=time, **kwargs) + + result_list = [] + for key, time_list in six.iteritems(result): + for time in time_list: + if output is 'combined': + key = list(key) + key.append(time) + result_list.append(key) else: - sele1 = donor - sele1_index = donor_index - sele1_resnm, sele1_resid, sele1_atom = sele1 - - for (donor_index, acceptor_index, donor, acceptor, - distance, angle) in selection2: - if donor[:2] == water: - sele2 = acceptor - sele2_index = acceptor_index - else: - sele2 = donor - sele2_index = donor_index - sele2_resnm, sele2_resid, sele2_atom = sele2 - - key = (sele1_index, sele2_index) - if key in pairs: - continue - else: - pairs.add(key) - wb_key = (sele1_index, sele2_index, sele1_resnm, - sele1_resid, sele1_atom, sele2_resnm, sele2_resid, - sele2_atom) - wbridges[wb_key] += 1 + result_list.append((key, time)) + return result_list + else: + return None + def generate_table(self, output_format=None): + """Generate a normalised table of the results. + + The table is stored as a :class:`numpy.recarray` in the + attribute :attr:`~WaterBridgeAnalysis.table`. + + The output format of :attr:`~WaterBridgeAnalysis.table` can also be + changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` + """ + output_format = output_format or self.output_format + if self._network == []: + msg = "No data computed, do run() first." + warnings.warn(msg, category=MissingDataWarning) + logger.warning(msg) + return None + timeseries = self._generate_timeseries(output_format) + + num_records = np.sum([len(hframe) for hframe in timeseries]) # build empty output table - dtype = [ - ("sele1_index", int), ("sele2_index", int), ('sele1_resnm', 'U4'), - ('sele1_resid', int), ('sele1_atom', 'U4'), ('sele2_resnm', 'U4'), - ('sele2_resid', int), ('sele2_atom', 'U4'), - ('frequency', float) - ] - out = np.empty((len(wbridges),), dtype=dtype) - - tsteps = float(len(self.timesteps)) - for cursor, (key, count) in enumerate(six.iteritems(wbridges)): - out[cursor] = key + (count / tsteps,) - - r = out.view(np.recarray) - return r + if output_format == 'sele1_sele2': + dtype = [ + ("time", float), + ("sele1_index", int), ("sele2_index", int), + ("sele1_resnm", "|U4"), ("sele1_resid", int), ("sele1_atom", "|U4"), + ("sele2_resnm", "|U4"), ("sele2_resid", int), ("sele2_atom", "|U4"), + ("distance", float), ("angle", float)] + elif output_format == 'donor_acceptor': + dtype = [ + ("time", float), + ("donor_index", int), ("acceptor_index", int), + ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), + ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), + ("distance", float), ("angle", float)] + + # according to Lukas' notes below, using a recarray at this stage is ineffective + # and speedups of ~x10 can be achieved by filling a standard array, like this: + out = np.empty((num_records,), dtype=dtype) + cursor = 0 # current row + for t, hframe in zip(self.timesteps, timeseries): + for (donor_index, acceptor_index, donor, + acceptor, distance, angle) in hframe: + # donor|acceptor = (resname, resid, atomid) + out[cursor] = (t, donor_index, acceptor_index) + \ + donor + acceptor + (distance, angle) + cursor += 1 + assert cursor == num_records, "Internal Error: Not all wb records stored" + table = out.view(np.recarray) + logger.debug("WBridge: Stored results as table with %(num_records)d entries.", vars()) + self.table = table diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index eae136fc3d7..c613105b53c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,9 +1,13 @@ from __future__ import print_function, absolute_import from six import StringIO +from collections import defaultdict -from numpy.testing import assert_equal +from numpy.testing import ( + assert_equal, assert_array_equal,) +import pytest import MDAnalysis +import MDAnalysis.analysis.hbonds from MDAnalysis.analysis.hbonds.wbridge_analysis import WaterBridgeAnalysis def test_import_from_hbonds(): @@ -15,27 +19,84 @@ def test_import_from_hbonds(): "MDAnalysis.analysis.hbonds failed.'") class TestWaterBridgeAnalysis(object): - def test_acceptor_water_accepter(self): - '''Test case where the hydrogen bond acceptor from selection 1 form - water bridge with hydrogen bond acceptor from selection 2''' + @staticmethod + @pytest.fixture(scope='class') + def universe_empty(): + '''A universe with no hydrogen bonds''' grofile = '''Test gro file 5 + 1ALA N 1 0.000 0.000 0.000 + 1ALA H 2 0.100 0.000 0.000 + 2SOL OW 3 3.000 0.000 0.000 + 4ALA H 4 0.500 0.000 0.000 + 4ALA N 5 0.600 0.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_DA(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen bond + donor''' + grofile = '''Test gro file +3 + 1ALA N 1 0.000 0.000 0.000 + 1ALA H 2 0.100 0.000 0.000 + 4ALA O 3 0.300 0.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_DA_PBC(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen bond + donor but in a PBC condition''' + grofile = '''Test gro file +3 + 1ALA N 1 0.800 0.000 0.000 + 1ALA H 2 0.900 0.000 0.000 + 4ALA O 3 0.100 0.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_AD(): + '''A universe with one hydrogen bond donor bonding to a hydrogen bond + acceptor''' + grofile = '''Test gro file +3 1ALA O 1 0.000 0.000 0.000 - 2SOL OW 2 0.300 0.000 0.000 - 2SOL HW1 3 0.200 0.000 0.000 - 2SOL HW2 4 0.400 0.000 0.000 + 4ALA H 2 0.200 0.000 0.000 + 4ALA N 3 0.300 0.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_loop(): + '''A universe with one hydrogen bond acceptor bonding to a water which + bonds back to the first hydrogen bond acceptor and thus form a loop''' + grofile = '''Test gro file +5 + 1ALA O 1 0.000 0.001 0.000 + 2SOL OW 2 0.300 0.001 0.000 + 2SOL HW1 3 0.200 0.002 0.000 + 2SOL HW2 4 0.200 0.000 0.000 4ALA O 5 0.600 0.000 0.000 1.0 1.0 1.0''' u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - wb.run(verbose=False) - timeseries = wb._timeseries - assert_equal(timeseries[0][0][:4], (2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'))) - assert_equal(timeseries[0][1][:4], (3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'))) + return u - def test_donor_water_accepter(self): - '''Test case where the hydrogen bond donor from selection 1 form - water bridge with hydrogen bond acceptor from selection 2''' + @staticmethod + @pytest.fixture(scope='class') + def universe_DWA(): + '''A universe with one hydrogen bond donor bonding to a hydrogen bond + acceptor through a water''' grofile = '''Test gro file 5 1ALA N 1 0.000 0.000 0.000 @@ -45,191 +106,620 @@ def test_donor_water_accepter(self): 4ALA O 5 0.600 0.000 0.000 1.0 1.0 1.0''' u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - wb.run(verbose=False) - timeseries = wb._timeseries - assert_equal(timeseries[0][0][:4], (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'))) - assert_equal(timeseries[0][1][:4], (3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'))) + return u - def test_acceptor_water_donor(self): - '''Test case where the hydrogen bond acceptor from selection 1 form - water bridge with hydrogen bond donor from selection 2''' + @staticmethod + @pytest.fixture(scope='class') + def universe_DWD(): + '''A universe with one hydrogen bond donor bonding to a hydrogen bond + donor through a water''' grofile = '''Test gro file 5 - 1ALA O 1 0.000 0.000 0.000 - 2SOL OW 2 0.300 0.000 0.000 - 2SOL HW1 3 0.200 0.000 0.000 + 1ALA N 1 0.000 0.000 0.000 + 1ALA H 2 0.100 0.000 0.000 + 2SOL OW 3 0.300 0.000 0.000 4ALA H 4 0.500 0.000 0.000 4ALA N 5 0.600 0.000 0.000 1.0 1.0 1.0''' u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - wb.run(verbose=False) - timeseries = wb._timeseries - assert_equal(timeseries[0][0][:4], (2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'))) - assert_equal(timeseries[0][1][:4], (3, 1, ('ALA', 4, 'H'), ('SOL', 2, 'OW'))) + return u - def test_donor_water_donor(self): - '''Test case where the hydrogen bond donor from selection 1 form - water bridge with hydrogen bond donor from selection 2''' + @staticmethod + @pytest.fixture(scope='class') + def universe_AWA(): + '''A universe with two hydrogen bond acceptor are joined by a water''' grofile = '''Test gro file 5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 4ALA H 4 0.500 0.000 0.000 - 4ALA N 5 0.600 0.000 0.000 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 4ALA O 5 0.600 0.000 0.000 1.0 1.0 1.0''' u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - wb.run(verbose=False) - timeseries = wb._timeseries - assert_equal(timeseries[0][0][:4], (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'))) - assert_equal(timeseries[0][1][:4], (3, 2, ('ALA', 4, 'H'), ('SOL', 2, 'OW'))) + return u - def test_empty(self): - '''Test case where no water bridge exists''' + @staticmethod + @pytest.fixture(scope='class') + def universe_AWD(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen + bond donor through a water''' grofile = '''Test gro file 5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 3.000 0.000 0.000 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 4ALA H 4 0.500 0.000 0.000 4ALA N 5 0.600 0.000 0.000 1.0 1.0 1.0''' u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein', 'protein') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_AWWA(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen bond + acceptor through two waters''' + grofile = '''Test gro file +7 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 3SOL OW 5 0.600 0.000 0.000 + 3SOL HW1 6 0.700 0.000 0.000 + 4ALA O 7 0.900 0.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_AWWWA(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen bond + acceptor through three waters''' + grofile = '''Test gro file +9 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 3SOL OW 5 0.600 0.000 0.000 + 3SOL HW1 6 0.700 0.000 0.000 + 4SOL OW 7 0.900 0.000 0.000 + 4SOL HW1 8 1.000 0.000 0.000 + 5ALA O 9 1.200 0.000 0.000 + 10.0 10.0 10.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_AWWWWA(): + '''A universe with one hydrogen bond acceptor bonding to a hydrogen bond + acceptor through three waters''' + grofile = '''Test gro file +11 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 3SOL OW 5 0.600 0.000 0.000 + 3SOL HW1 6 0.700 0.000 0.000 + 4SOL OW 7 0.900 0.000 0.000 + 4SOL HW1 8 1.000 0.000 0.000 + 5SOL OW 9 1.200 0.000 0.000 + 5SOL HW1 10 1.300 0.000 0.000 + 6ALA O 11 1.400 0.000 0.000 + 10.0 10.0 10.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_branch(): + '''A universe with one hydrogen bond acceptor bonding to two hydrogen + bond acceptor in selection 2''' + grofile = '''Test gro file +9 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 3SOL OW 5 0.600 0.000 0.000 + 3SOL HW1 6 0.700 0.000 0.000 + 3SOL HW2 7 0.600 0.100 0.000 + 4ALA O 8 0.900 0.000 0.000 + 5ALA O 9 0.600 0.300 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def universe_AWA_AWWA(): + '''A universe with one hydrogen bond acceptors are bonded through one or + two water''' + grofile = '''Test gro file +12 + 1ALA O 1 0.000 0.000 0.000 + 2SOL OW 2 0.300 0.000 0.000 + 2SOL HW1 3 0.200 0.000 0.000 + 2SOL HW2 4 0.400 0.000 0.000 + 4ALA O 5 0.600 0.000 0.000 + 5ALA O 6 0.000 1.000 0.000 + 6SOL OW 7 0.300 1.000 0.000 + 6SOL HW1 8 0.200 1.000 0.000 + 6SOL HW2 9 0.400 1.000 0.000 + 7SOL OW 10 0.600 1.000 0.000 + 7SOL HW1 11 0.700 1.000 0.000 + 8ALA O 12 0.900 1.000 0.000 + 1.0 1.0 1.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + return u + + @staticmethod + @pytest.fixture(scope='class') + def wb_multiframe(): + '''A water bridge object with multipley frames''' + grofile = '''Test gro file + 13 + 1ALA O 1 0.000 0.000 0.000 + 1ALA H 2 0.000 0.000 0.000 + 2SOL OW 3 0.300 0.000 0.000 + 2SOL HW1 4 0.200 0.000 0.000 + 2SOL HW2 5 0.400 0.000 0.000 + 3SOL OW 6 0.600 0.000 0.000 + 3SOL HW1 7 0.700 0.000 0.000 + 4SOL OW 8 0.900 0.000 0.000 + 4SOL HW1 9 1.000 0.000 0.000 + 5SOL OW 10 1.200 0.000 0.000 + 5SOL HW1 11 1.300 0.000 0.000 + 6ALA H 12 1.400 0.000 0.000 + 6ALA O 13 1.400 0.000 0.000 + 10.0 10.0 10.0''' + u = MDAnalysis.Universe(StringIO(grofile), format='gro') + wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)', + order=4) + # Build an dummy WaterBridgeAnalysis object for testing + wb._network = [] + wb._network.append({(1, 0, 12, None, 2.0, 180.0): None}) + wb._network.append({(0, None, 12, 13, 2.0, 180.0): None}) + wb._network.append({(1, 0, 3, None, 2.0, 180.0): + {(4, 2, 12, None, 2.0, 180.0): None}}) + wb._network.append({(0, None, 3, 2, 2.0, 180.0): + {(4, 2, 5, None, 2.0, 180.0): + {(5, None, 11, 12, 2.0, 180.0): None}}}) + wb.timesteps = range(len(wb._network)) + return wb + + def test_nodata(self, universe_DA): + '''Test if the funtions can run when there is no data. + This is achieved by not runing the run() first.''' + wb = WaterBridgeAnalysis(universe_DA, 'protein and (resid 1)', + 'protein and (resid 4)', order=0) + wb.generate_table() + assert_equal(wb.timesteps_by_type(), None) + assert_equal(wb.count_by_time(), None) + assert_equal(wb.count_by_type(), None) + + def test_selection_type_error(self, universe_DA): + '''Test the case when the wrong selection1_type is given''' + try: + wb = WaterBridgeAnalysis(universe_DA, 'protein and (resid 1)', + 'protein and (resid 4)', order=0, selection1_type='aaa') + except ValueError: + pass + else: + raise pytest.fail("selection_type aaa should rasie error") + + def test_empty_selection(self, universe_DA): + '''Test the case when selection yields empty result''' + wb = WaterBridgeAnalysis(universe_DA, 'protein and (resid 9)', + 'protein and (resid 10)', order=0) + wb.run() + assert wb._network == [{}] + + def test_loop(self, universe_loop): + '''Test if loop can be handled correctly''' + wb = WaterBridgeAnalysis(universe_loop, 'protein and (resid 1)', + 'protein and (resid 1 or resid 4)') + wb.run() + assert_equal(len(wb._network[0].keys()), 2) + + def test_donor_accepter(self, universe_DA): + '''Test zeroth order donor to acceptor hydrogen bonding''' + wb = WaterBridgeAnalysis(universe_DA, 'protein and (resid 1)', + 'protein and (resid 4)', order=0, update_selection=True, debug=True) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (1, 0, 2, None)) + + def test_donor_accepter_pbc(self, universe_DA_PBC): + '''Test zeroth order donor to acceptor hydrogen bonding in PBC conditions''' + wb = WaterBridgeAnalysis(universe_DA_PBC, 'protein and (resid 1)', + 'protein and (resid 4)', order=0, pbc=True) wb.run(verbose=False) - assert_equal(wb._timeseries, [[]]) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (1, 0, 2, None)) - def test_same_selection(self): + def test_accepter_donor(self, universe_AD): + '''Test zeroth order acceptor to donor hydrogen bonding''' + wb = WaterBridgeAnalysis(universe_AD, 'protein and (resid 1)', + 'protein and (resid 4)', order=0) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 1, 2)) + + def test_acceptor_water_accepter(self, universe_AWA): + '''Test case where the hydrogen bond acceptor from selection 1 form + water bridge with hydrogen bond acceptor from selection 2''' + wb = WaterBridgeAnalysis(universe_AWA, 'protein and (resid 1)', + 'protein and (resid 4)') + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + assert_equal(second[list(second.keys())[0]], None) + + def test_donor_water_accepter(self, universe_DWA): + '''Test case where the hydrogen bond donor from selection 1 form + water bridge with hydrogen bond acceptor from selection 2''' + wb = WaterBridgeAnalysis(universe_DWA, 'protein and (resid 1)', + 'protein and (resid 4)') + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (1, 0, 2, None)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 2, 4, None)) + assert_equal(second[list(second.keys())[0]], None) + + def test_acceptor_water_donor(self, universe_AWD): + '''Test case where the hydrogen bond acceptor from selection 1 form + water bridge with hydrogen bond donor from selection 2''' + wb = WaterBridgeAnalysis(universe_AWD, 'protein and (resid 1)', + 'protein and (resid 4)') + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (1, None, 3, 4)) + assert_equal(second[list(second.keys())[0]], None) + + def test_donor_water_donor(self, universe_DWD): + '''Test case where the hydrogen bond donor from selection 1 form + water bridge with hydrogen bond donor from selection 2''' + wb = WaterBridgeAnalysis(universe_DWD, 'protein and (resid 1)', + 'protein and (resid 4)') + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (1, 0, 2, None)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (2, None, 3, 4)) + assert_equal(second[list(second.keys())[0]], None) + + def test_empty(self, universe_empty): + '''Test case where no water bridge exists''' + wb = WaterBridgeAnalysis(universe_empty, 'protein', 'protein') + wb.run(verbose=False) + assert_equal(wb._network[0], defaultdict(dict)) + + def test_same_selection(self, universe_DWA): ''' This test tests that if the selection 1 and selection 2 are both protein. However, the protein only forms one hydrogen bond with the water. This entry won't be included. - :return: ''' - grofile = '''Test gro file -3 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 1.0 1.0 1.0''' - u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein', 'protein') + wb = WaterBridgeAnalysis(universe_DWA, 'protein and resid 1', + 'protein and resid 1') + wb.run(verbose=False) + assert_equal(wb._network[0], defaultdict(dict)) + + def test_acceptor_2water_accepter(self, universe_AWWA): + '''Test case where the hydrogen bond acceptor from selection 1 form second order + water bridge with hydrogen bond acceptor from selection 2''' + # test first order + wb = WaterBridgeAnalysis(universe_AWWA, 'protein and (resid 1)', + 'protein and (resid 4)') + wb.run(verbose=False) + assert_equal(wb._network[0], defaultdict(dict)) + # test second order + wb = WaterBridgeAnalysis(universe_AWWA, 'protein and (resid 1)', + 'protein and (resid 4)', order=2) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + assert_equal(third[list(third.keys())[0]], None) + # test third order + wb = WaterBridgeAnalysis(universe_AWWA, 'protein and (resid 1)', + 'protein and (resid 4)', order=3) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + assert_equal(third[list(third.keys())[0]], None) + + def test_acceptor_3water_accepter(self, universe_AWWWA): + '''Test case where the hydrogen bond acceptor from selection 1 form third order + water bridge with hydrogen bond acceptor from selection 2''' + wb = WaterBridgeAnalysis(universe_AWWWA, 'protein and (resid 1)', + 'protein and (resid 5)', order=2) + wb.run(verbose=False) + assert_equal(wb._network[0], defaultdict(dict)) + + wb = WaterBridgeAnalysis(universe_AWWWA, 'protein and (resid 1)', + 'protein and (resid 5)', order=3) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + fourth = third[list(third.keys())[0]] + assert_equal(list(fourth.keys())[0][:4], (7, 6, 8, None)) + assert_equal(fourth[list(fourth.keys())[0]], None) + + wb = WaterBridgeAnalysis(universe_AWWWA, 'protein and (resid 1)', + 'protein and (resid 5)', order=4) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + fourth = third[list(third.keys())[0]] + assert_equal(list(fourth.keys())[0][:4], (7, 6, 8, None)) + assert_equal(fourth[list(fourth.keys())[0]], None) + + def test_acceptor_4water_accepter(self, universe_AWWWWA): + '''Test case where the hydrogen bond acceptor from selection 1 form fourth order + water bridge with hydrogen bond acceptor from selection 2''' + wb = WaterBridgeAnalysis(universe_AWWWWA, 'protein and (resid 1)', + 'protein and (resid 6)', order=3) + wb.run(verbose=False) + assert_equal(wb._network[0], defaultdict(dict)) + + wb = WaterBridgeAnalysis(universe_AWWWWA, 'protein and (resid 1)', + 'protein and (resid 6)', order=4) wb.run(verbose=False) - assert_equal(wb._timeseries, [[]]) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + fourth = third[list(third.keys())[0]] + assert_equal(list(fourth.keys())[0][:4], (7, 6, 8, None)) + fifth = fourth[list(fourth.keys())[0]] + assert_equal(list(fifth.keys())[0][:4], (9, 8, 10, None)) + assert_equal(fifth[list(fifth.keys())[0]], None) - def test_water_network(self): + wb = WaterBridgeAnalysis(universe_AWWWWA, 'protein and (resid 1)', + 'protein and (resid 6)', order=5) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal(list(third.keys())[0][:4], (5, 4, 6, None)) + fourth = third[list(third.keys())[0]] + assert_equal(list(fourth.keys())[0][:4], (7, 6, 8, None)) + fifth = fourth[list(fourth.keys())[0]] + assert_equal(list(fifth.keys())[0][:4], (9, 8, 10, None)) + assert_equal(fifth[list(fifth.keys())[0]], None) + + def test_acceptor_22water_accepter(self, universe_branch): + '''Test case where the hydrogen bond acceptor from selection 1 form a second order + water bridge with hydrogen bond acceptor from selection 2 + and the last water is linked to two residues in selection 2''' + wb = WaterBridgeAnalysis(universe_branch, 'protein and (resid 1)', + 'protein and (resid 4 or resid 5)', order=2) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + third = second[list(second.keys())[0]] + assert_equal([(5, 4, 7, None), (6, 4, 8, None)], + sorted([key[:4] for key in list(third.keys())])) + + def test_timeseries_wba(self, universe_branch): + '''Test if the time series data is correctly generated in water bridge analysis format''' + wb = WaterBridgeAnalysis(universe_branch, 'protein and (resid 1)', + 'protein and (resid 4 or resid 5)', order=2) + wb.output_format = 'sele1_sele2' + wb.run(verbose=False) + timeseries = sorted(wb.timeseries[0]) + + assert_equal(timeseries[0][:4], (0, 2, ('ALA', 1, 'O'), ('SOL', 2, 'HW1'))) + assert_equal(timeseries[1][:4], (3, 4, ('SOL', 2, 'HW2'), ('SOL', 3, 'OW'))) + assert_equal(timeseries[2][:4], (5, 7, ('SOL', 3, 'HW1'), ('ALA', 4, 'O'))) + assert_equal(timeseries[3][:4], (6, 8, ('SOL', 3, 'HW2'), ('ALA', 5, 'O'))) + + def test_timeseries_hba(self, universe_branch): + '''Test if the time series data is correctly generated in hydrogen bond analysis format''' + wb = WaterBridgeAnalysis(universe_branch, 'protein and (resid 1)', + 'protein and (resid 4 or resid 5)', order=2) + wb.output_format = 'donor_acceptor' + wb.run(verbose=False) + timeseries = sorted(wb.timeseries[0]) + + assert_equal(timeseries[0][:4], (2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'))) + assert_equal(timeseries[1][:4], (3, 4, ('SOL', 2, 'HW2'), ('SOL', 3, 'OW'))) + assert_equal(timeseries[2][:4], (5, 7, ('SOL', 3, 'HW1'), ('ALA', 4, 'O'))) + assert_equal(timeseries[3][:4], (6, 8, ('SOL', 3, 'HW2'), ('ALA', 5, 'O'))) + + def test_acceptor_12water_accepter(self, universe_AWA_AWWA): + '''Test of independent first order and second can be recognised correctely''' + wb = WaterBridgeAnalysis(universe_AWA_AWWA, 'protein and (resid 1 or resid 5)', + 'protein and (resid 4 or resid 8)', order=1) + wb.run(verbose=False) + network = wb._network[0] + assert_equal(list(network.keys())[0][:4], (0, None, 2, 1)) + second = network[list(network.keys())[0]] + assert_equal(list(second.keys())[0][:4], (3, 1, 4, None)) + assert_equal(second[list(second.keys())[0]], None) + network = wb._network[0] + wb = WaterBridgeAnalysis(universe_AWA_AWWA, 'protein and (resid 1 or resid 5)', + 'protein and (resid 4 or resid 8)', order=2) + wb.run(verbose=False) + network = wb._network[0] + assert_equal([(0, None, 2, 1), (5, None, 7, 6)], + sorted([key[:4] for key in list(network.keys())])) + + def test_count_by_type_single_link(self, universe_DWA): ''' - This test tests if the internal water object is generated correctly. - :return: + This test tests the simplest water bridge to see if count_by_type() works. ''' - grofile = '''Test gro file -5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 2SOL HW2 4 0.400 0.000 0.000 - 4ALA O 5 0.600 0.000 0.000 - 1.0 1.0 1.0''' - u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') + wb = WaterBridgeAnalysis(universe_DWA, 'protein and (resid 1)', + 'protein and (resid 4)') wb.run(verbose=False) - water_network = wb._water_network[0] - assert_equal(list(water_network.keys()), [('SOL', 2)]) - values = list(water_network.values()) - assert_equal(list(values[0][0])[0][:4], (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'))) - assert_equal(list(values[0][1])[0][:4], (3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'))) + assert_equal(wb.count_by_type(), [(1, 4, 'ALA', 1, 'H', 'ALA', 4, 'O', 1.)]) - def test_count_by_type_single_link(self): + def test_count_by_type_multiple_link(self, universe_AWA_AWWA): ''' - This test tests the simplest water bridge to see if count_by_type() works. - :return: + This test tests if count_by_type() can give the correct result for more than 1 links. ''' - grofile = '''Test gro file -5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 2SOL HW2 4 0.400 0.000 0.000 - 4ALA O 5 0.600 0.000 0.000 - 1.0 1.0 1.0''' - u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') + wb = WaterBridgeAnalysis(universe_AWA_AWWA, 'protein and (resid 1 or resid 5)', + 'protein and (resid 4 or resid 8)', order=2) wb.run(verbose=False) - assert_equal(wb.count_by_type().tolist(), [(1, 4, 'ALA', 1, 'H', 'ALA', 4, 'O', 1.)]) + assert_equal(sorted(wb.count_by_type()), + [[0, 4, 'ALA', 1, 'O', 'ALA', 4, 'O', 1.0], + [5, 11, 'ALA', 5, 'O', 'ALA', 8, 'O', 1.0]]) - def test_count_by_type_multiple_link(self): + + def test_count_by_type_multiple_frame(self, wb_multiframe): ''' - This test tests if count_by_type() can assemble linkage from water network. + This test tests if count_by_type() works in multiply situations. :return: ''' - grofile = '''Test gro file -5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 2SOL HW2 4 0.400 0.000 0.000 - 4ALA O 5 0.600 0.000 0.000 - 1.0 1.0 1.0''' - u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - # Build an dummy WaterBridgeAnalysis object for testing - wb._timeseries = True - wb.timesteps = [0] - wb._water_network = [{('SOL', 2): [{(2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'), 2.0, 179.99998), - (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}, - {(3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'), 2.0, 179.99998), - (3, 2, ('ALA', 4, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}]}] - result = [(0, 3, 'ALA', 1, 'O', 'ALA', 4, 'H', 1.0), - (0, 4, 'ALA', 1, 'O', 'ALA', 4, 'O', 1.0), - (1, 3, 'ALA', 1, 'H', 'ALA', 4, 'H', 1.0), - (1, 4, 'ALA', 1, 'H', 'ALA', 4, 'O', 1.0)] - assert_equal(sorted(wb.count_by_type().tolist()), result) - - def test_count_by_type_multiple_frame(self): + result = [[0, 11, 'ALA', 1, 'O', 'ALA', 6, 'H', 0.25], + [0, 12, 'ALA', 1, 'O', 'ALA', 6, 'O', 0.25], + [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0.5]] + + assert_equal(sorted(wb_multiframe.count_by_type()), result) + + def test_count_by_type_filter(self, wb_multiframe): ''' - This test tests if count_by_type() works in multiply situations. + This test tests if modifying analysis_func + allows some results to be filtered out in count_by_type(). :return: ''' - grofile = '''Test gro file -5 - 1ALA N 1 0.000 0.000 0.000 - 1ALA H 2 0.100 0.000 0.000 - 2SOL OW 3 0.300 0.000 0.000 - 2SOL HW2 4 0.400 0.000 0.000 - 4ALA O 5 0.600 0.000 0.000 - 1.0 1.0 1.0''' - u = MDAnalysis.Universe(StringIO(grofile), format='gro') - wb = WaterBridgeAnalysis(u, 'protein and (resid 1)', 'protein and (resid 4)') - # Build an dummy WaterBridgeAnalysis object for testing - wb._timeseries = True - wb.timesteps = [0, 1, 2, 3, 4, 5] - wb._water_network = [# a 2 * 2 water netwrok consists of all four links - {('SOL', 2): [{(2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'), 2.0, 179.99998), - (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}, - {(3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'), 2.0, 179.99998), - (3, 2, ('ALA', 4, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}]}, - # a repeat - {('SOL', 2): [{(2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'), 2.0, 179.99998), - (1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}, - {(3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'), 2.0, 179.99998), - (3, 2, ('ALA', 4, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}]}, - # single link 1 - {('SOL', 2): [{(2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'), 2.0, 179.99998)}, - {(3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'), 2.0, 179.99998)}]}, - # single link 2 - {('SOL', 2): [{(2, 0, ('SOL', 2, 'HW1'), ('ALA', 1, 'O'), 2.0, 179.99998)}, - {(3, 1, ('ALA', 4, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}]}, - # single link 3 - {('SOL', 2): [{(1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}, - {(3, 4, ('SOL', 2, 'HW2'), ('ALA', 4, 'O'), 2.0, 179.99998)}]}, - # single link 4 - {('SOL', 2): [{(1, 2, ('ALA', 1, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}, - {(3, 2, ('ALA', 4, 'H'), ('SOL', 2, 'OW'), 2.0, 179.99998)}]}] - result = [(0, 3, 'ALA', 1, 'O', 'ALA', 4, 'H', 0.5), - (0, 4, 'ALA', 1, 'O', 'ALA', 4, 'O', 0.5), - (1, 3, 'ALA', 1, 'H', 'ALA', 4, 'H', 0.5), - (1, 4, 'ALA', 1, 'H', 'ALA', 4, 'O', 0.5)] - assert_equal(sorted(wb.count_by_type().tolist()), result) + def analysis(current, output, u): + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + + key = (sele1_index, sele2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + if s2_name == 'H': + output[key] += 1 + result = [((0, 11, 'ALA', 1, 'O', 'ALA', 6, 'H'), 0.25)] + assert_equal(sorted(wb_multiframe.count_by_type(analysis_func=analysis)), result) + + def test_count_by_type_merge(self, wb_multiframe): + ''' + This test tests if modifying analysis_func + allows some same residue to be merged in count_by_type(). + ''' + def analysis(current, output, u): + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + + key = (s1_resname, s1_resid, s2_resname, s2_resid) + output[key] = 1 + result = [(('ALA', 1, 'ALA', 6), 1.0)] + assert_equal(sorted(wb_multiframe.count_by_type(analysis_func=analysis)), result) + + def test_count_by_type_order(self, wb_multiframe): + ''' + This test tests if modifying analysis_func + allows the order of water bridge to be separated in count_by_type(). + :return: + ''' + def analysis(current, output, u): + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + key = (s1_resname, s1_resid, s2_resname, s2_resid, len(current)-1) + output[key] = 1 + result = [(('ALA', 1, 'ALA', 6, 0), 0.5), + (('ALA', 1, 'ALA', 6, 1), 0.25), + (('ALA', 1, 'ALA', 6, 2), 0.25)] + assert_equal(sorted(wb_multiframe.count_by_type(analysis_func=analysis)), result) + + def test_count_by_time(self, wb_multiframe): + ''' + This test tests if count_by_times() works. + :return: + ''' + assert_equal(wb_multiframe.count_by_time(), [(0, 1), (1, 1), (2, 1), (3, 1)]) + + + def test_count_by_time_weight(self, universe_AWA_AWWA): + ''' + This test tests if modyfing the analysis_func allows the weight to be changed + in count_by_type(). + :return: + ''' + wb = WaterBridgeAnalysis(universe_AWA_AWWA, 'protein and (resid 1 or resid 5)', + 'protein and (resid 4 or resid 8)', order=2) + wb.run(verbose=False) + def analysis(current, output, u): + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + key = (s1_resname, s1_resid, s2_resname, s2_resid) + output[key] += len(current)-1 + assert_equal(wb.count_by_time(analysis_func=analysis), [(0,3), ]) + + def test_count_by_time_empty(self, universe_AWA_AWWA): + ''' + See if count_by_time() can handle zero well. + :return: + ''' + wb = WaterBridgeAnalysis(universe_AWA_AWWA, 'protein and (resid 1 or resid 5)', + 'protein and (resid 4 or resid 8)', order=2) + wb.run(verbose=False) + def analysis(current, output, u): + pass + assert_equal(wb.count_by_time(analysis_func=analysis), [(0,0), ]) + + def test_generate_table_hba(self, wb_multiframe): + '''Test generate table using hydrogen bond analysis format''' + wb_multiframe.generate_table(output_format='donor_acceptor') + assert_array_equal(sorted(wb_multiframe.table.donor_resid), [1, 1, 2, 2, 2, 6, 6]) + + def test_generate_table_s1s2(self, wb_multiframe): + '''Test generate table using hydrogen bond analysis format''' + wb_multiframe.generate_table(output_format='sele1_sele2') + assert_array_equal(sorted(wb_multiframe.table.sele1_resid), [1, 1, 1, 1, 2, 2, 3]) + + def test_timesteps_by_type(self, wb_multiframe): + '''Test the timesteps_by_type function''' + + timesteps = sorted(wb_multiframe.timesteps_by_type()) + assert_array_equal(timesteps[3], [1, 12, 'ALA', 1, 'H', 'ALA', 6, 'O', 0, 2]) diff --git a/testsuite/MDAnalysisTests/topology/test_gsd.py b/testsuite/MDAnalysisTests/topology/test_gsd.py index c11fa87aa9e..3c25462698c 100644 --- a/testsuite/MDAnalysisTests/topology/test_gsd.py +++ b/testsuite/MDAnalysisTests/topology/test_gsd.py @@ -49,7 +49,6 @@ def test_attr_size(self, top): assert len(top.resids) == top.n_residues assert len(top.resnames) == top.n_residues - @pytest.mark.skipif(os.name == 'nt', reason="gsd not windows compatible") class TestGSDParserBonds(ParserBase):