From eb2dfea54c6832c8906926d6a294aad16882e8b2 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:01:23 +0100 Subject: [PATCH 1/6] New implementation of the HydrogenBondAnalysis class. The class itself and the run() method have both been simplified in comparison to the original implementation. The detection of hydrogen bonds has been optimised using the MDAnalysis.lib.distances.capped_distance() method. For loops have been replaced by vectorised numpy code. The data from run() is stored in a numpy.ndarray in the 'tidy data' format suggested in Issue MDAnalysis#2177. --- package/MDAnalysis/analysis/__init__.py | 1 + .../analysis/hydrogenbonds/__init__.py | 29 + .../analysis/hydrogenbonds/hbond_analysis.py | 559 ++++++++++++++++++ 3 files changed, 589 insertions(+) create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/__init__.py create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py diff --git a/package/MDAnalysis/analysis/__init__.py b/package/MDAnalysis/analysis/__init__.py index 72255569765..1a0883882ea 100644 --- a/package/MDAnalysis/analysis/__init__.py +++ b/package/MDAnalysis/analysis/__init__.py @@ -118,6 +118,7 @@ 'distances', 'gnm', 'hbonds', + 'hydrogenbonds', 'helanal', 'hole', 'leaflet', diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py new file mode 100644 index 00000000000..e77893e5d2e --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -0,0 +1,29 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import absolute_import + +__all__ = [ + 'HydrogenBondAnalysis' +] + +from .hbond_analysis import HydrogenBondAnalysis diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py new file mode 100644 index 00000000000..2e5afe4d622 --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -0,0 +1,559 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""Hydrogen Bond Analysis --- :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_analysis` +===================================================================================== + +:Author: Paul Smith +:Year: 2019 +:Copyright: GNU Public License v3 + +This module provides methods to find and analyse hydrogen bonds in a Universe. + +The :class:`HydrogenBondAnalysis` class is a new version of the original +:class:`MDAnalysis.analysis.hbonds.HydrogenBondAnalysis` class from the module +:mod:`MDAnalysis.analysis.hbonds.hbond_analysis`, which itself was modeled after the `VMD +HBONDS plugin`_. + +.. _`VMD HBONDS plugin`: http://www.ks.uiuc.edu/Research/vmd/plugins/hbonds/ + + +Input +------ + +Required: + - *universe* : an MDAnalysis Universe object + +Options: + - *donors_sel* [None] : Atom selection for donors. If `None`, then will be identified via the topology. + - *hydrogens_sel* [None] : Atom selection for hydrogens. If `None`, then will be identified via charge and mass. + - *acceptors_sel* [None] : Atom selection for acceptors. If `None`, then will be identified via charge. + - *d_h_cutoff* (Å) [1.2] : Distance cutoff used for finding donor-hydrogen pairs + - *d_a_cutoff* (Å) [3.0] : Distance cutoff for hydrogen bonds. This cutoff refers to the D-A distance. + - *d_h_a_angle_cutoff* (degrees) [150] : D-H-A angle cutoff for hydrogen bonds. + - *update_selections* [True] : If true, will update atom selections at each frame. + + +Output +------ + + - *frame* : frame at which a hydrogen bond was found + - *donor id* : atom id of the hydrogen bond donor atom + - *hydrogen id* : atom id of the hydrogen bond hydrogen atom + - *acceptor id* : atom id of the hydrogen bond acceptor atom + - *distance* (Å): length of the hydrogen bond + - *angle* (degrees): angle of the hydrogen bond + +Hydrogen bond data are returned in a :class:`numpy.ndarray` on a "one line, one observation" basis +and can be accessed via :attr:`HydrogenBondAnalysis.hbonds`:: + + results = [ + [ + , + , + , + , + , + + ], + ... + ] + +Example use of :class:`HydrogenBondAnalysis` +-------------------------------------------- + +The simplest use case is to allow :class:`HydrogenBondAnalysis` to guess the acceptor and hydrogen atoms, and to +identify donor-hydrogen pairs via the bonding information in the topology:: + + import MDAnalysis + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + + u = MDAnalysis.Universe(psf, trajectory) + + hbonds = HBA(universe=u) + hbonds.run() + +It is also possible to specify which hydrogens and acceptors to use in the analysis. For example, to find all hydrogen +bonds in water:: + + import MDAnalysis + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + + u = MDAnalysis.Universe(psf, trajectory) + + hbonds = HBA(universe=u, hydrogens_sel='resname TIP3 and name H1 H2', acceptors_sel='resname TIP3 and name OH2') + hbonds.run() + +Alternatively, :attr:`hydrogens_sel` and :attr:`acceptors_sel` may be generated via the :attr:`guess_hydrogens` and +:attr:`guess_acceptors`. This selection strings may then be modified prior to calling :attr:`run`, or a subset of +the universe may be used to guess the atoms. For example, find hydrogens and acceptors belonging to a protein:: + + import MDAnalysis + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + + u = MDAnalysis.Universe(psf, trajectory) + + hbonds = HBA(universe=u) + hbonds.hydrogens_sel = hbonds.guess_hydrogens("protein") + hbonds.acceptors_sel = hbonds.guess_acceptors("protein") + hbonds.run() + +Slightly more complex selection strings are also possible. For example, to find hydrogen bonds involving a protein and +any water molecules within 10 Å of the protein (which may be useful for subsequently finding the lifetime of +protein-water hydrogen bonds or finding water-bridging hydrogen bond paths):: + + import MDAnalysis + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + + u = MDAnalysis.Universe(psf, trajectory) + + hbonds = HBA(universe=u) + + protein_hydrogens_sel = hbonds.guess_hydrogens("protein") + protein_acceptors_sel = hbonds.guess_acceptors("protein") + + water_hydrogens_sel = "resname TIP3 and name H1 H2" + water_acceptors_sel = "resname TIP3 and name OH2" + + hbonds.hydrogens_sel = f"({protein_hydrogens_sel}) or ({water_hydrogens_sel} and around 10 not resname TIP3})" + hbonds.acceptors_sel = f"({protein_acceptors_sel}) or ({water_acceptors_sel} and around 10 not resname TIP3})" + hbonds.run() + +It is highly recommended that a topology with bonding information is used to generate the universe, e.g `PSF`, `TPR`, or +`PRMTOP` files. This is the only method by which it can be guaranteed that donor-hydrogen pairs are correctly identified. +However, if, for example, a `PDB` file is used instead, a :attr:`donors_sel` may be provided along with a +:attr:`hydrogens_sel` and the donor-hydrogen pairs will be identified via a distance cutoff, :attr:`d_h_cutoff`:: + + import MDAnalysis + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + + u = MDAnalysis.Universe(pdb, trajectory) + + hbonds = HBA( + universe=u, + donors_sel='resname TIP3 and name OH2', + hydrogens_sel='resname TIP3 and name H1 H2', + acceptors_sel='resname TIP3 and name OH2', + d_h_cutoff=1.2 + ) + hbonds.run() + +The class and its methods +------------------------- + +.. autoclass:: HydrogenBondAnalysis + :members: + +""" +from __future__ import absolute_import, division + +import numpy as np + +from .. import base +from MDAnalysis.lib.distances import capped_distance, calc_angles + +from ...due import due, Doi + +due.cite(Doi("10.1039/C9CP01532A"), + description="Hydrogen bond analysis implementation", + path="MDAnalysis.analysis.hydrogenbonds.hbond_analysis", + cite_module=True) + +del Doi + + +class HydrogenBondAnalysis(base.AnalysisBase): + """ + Perform an analysis of hydrogen bonds in a Universe. + """ + + def __init__(self, universe, donors_sel=None, hydrogens_sel=None, acceptors_sel=None, + d_h_cutoff=1.2, d_a_cutoff=3.0, d_h_a_angle_cutoff=150, update_selections=True): + """Set up atom selections and geometric criteria for finding hydrogen bonds in a Universe. + + Parameters + ---------- + universe : Universe + MDAnalysis Universe object + donors_sel : str + Selection string for the hydrogen bond donor atoms. If the universe topology contains bonding information, + leave :attr:`donors_sel` as `None` so that donor-hydrogen pairs can be correctly identified. + hydrogens_sel : str + Selection string for the hydrogen bond hydrogen atoms. Leave as `None` to guess which hydrogens to use in + the analysis using :attr:`guess_hydrogens`. If :attr:`hydrogens_sel` is left as `None`, also leave + :attr:`donors_sel` as None so that donor-hydrogen pairs can be correctly identified. + acceptors_sel : str + Selection string for the hydrogen bond acceptor atoms. Leave as `None` to guess which atoms to use in the + analysis using :attr:`guess_acceptors` + d_h_cutoff : float (optional) + Distance cutoff used for finding donor-hydrogen pairs [1.2]. Only used to find donor-hydrogen pairs if the + universe topology does not contain bonding information + d_a_cutoff : float (optional) + Distance cutoff for hydrogen bonds. This cutoff refers to the D-A distance. [3.0] + d_h_a_angle_cutoff: float (optional) + D-H-A angle cutoff for hydrogen bonds, in degrees. [150] + update_selections: bool (optional) + Whether or not to update the acceptor, donor and hydrogen lists at each frame. [True] + + Note + ---- + + It is highly recommended that a universe topology with bonding information is used, as this is the only way + that guarantees the correct identification of donor-hydrogen pairs. + """ + + self.u = universe + self._trajectory = self.u.trajectory + self.donors_sel = donors_sel + self.hydrogens_sel = hydrogens_sel + self.acceptors_sel = acceptors_sel + self.d_h_cutoff = d_h_cutoff + self.d_a_cutoff = d_a_cutoff + self.d_h_a_angle = d_h_a_angle_cutoff + self.update_selections = update_selections + + def guess_hydrogens(self, selection='all', max_mass=1.1, min_charge=0.3): + """Guesses which hydrogen atoms should be used in the analysis. + + Parameters + ---------- + selection: str (optional) + Selection string for atom group from which hydrogens will be identified. + max_mass: float (optional) + Maximum allowed mass of a hydrogen atom. + min_charge: float (optional) + Minimum allowed charge of a hydrogen atom. + + Returns + ------- + potential_hydrogens: str + String containing the :attr:`resname` and :attr:`name` of all hydrogen atoms potentially capable of forming + hydrogen bonds. + + Notes + ----- + This function makes use of atomic masses and atomic charges to identify which atoms are hydrogen atoms that are + capable of participating in hydrogen bonding. If an atom has a mass less than :attr:`max_mass` and an atomic + charge greater than :attr:`min_charge` then it is considered capable of participating in hydrogen bonds. + + If :attr:`hydrogens_sel` is `None`, this function is called to guess the selection. + + Alternatively, this function may be used to quickly generate a :class:`str` of potential hydrogen atoms involved + in hydrogen bonding. This str may then be modified before being used to set the attribute + :attr:`hydrogens_sel`. + """ + + ag = self.u.select_atoms(selection) + hydrogens_ag = ag[ + np.logical_and( + ag.masses < max_mass, + ag.charges > min_charge + ) + ] + + hydrogens_list = np.unique( + [ + '(resname {} and name {})'.format(r, p) for r, p in zip(hydrogens_ag.resnames, hydrogens_ag.names) + ] + ) + + return " or ".join(hydrogens_list) + + def guess_donors(self, selection='all', max_charge=-0.5): + """Guesses which atoms could be considered donors in the analysis. Only use if the universe topology does not + contain bonding information, otherwise donor-hydrogen pairs may be incorrectly assigned. + + Parameters + ---------- + selection: str (optional) + Selection string for atom group from which donors will be identified. + max_charge: float (optional) + Maximum allowed charge of a donor atom. + + Returns + ------- + potential_donors: str + String containing the :attr:`resname` and :attr:`name` of all atoms that potentially capable of forming + hydrogen bonds. + + Notes + ----- + This function makes use of and atomic charges to identify which atoms could be considered donor atoms in the + hydrogen bond analysis. If an atom has an atomic charge less than :attr:`max_charge`, and it is within + :attr:`d_h_cutoff` of a hydrogen atom, then it is considered capable of participating in hydrogen bonds. + + If :attr:`donors_sel` is `None`, and the universe topology does not have bonding information, this function is + called to guess the selection. + + Alternatively, this function may be used to quickly generate a :class:`str` of potential donor atoms involved + in hydrogen bonding. This :class:`str` may then be modified before being used to set the attribute + :attr:`donors_sel`. + + """ + + # We need to know `hydrogens_sel` before we can find donors + # Use a new variable `hydrogens_sel` so that we do not set `self.hydrogens_sel` if it is currently `None` + if not self.hydrogens_sel: + hydrogens_sel = self.guess_hydrogens() + else: + hydrogens_sel = self.hydrogens_sel + hydrogens_ag = self.u.select_atoms(hydrogens_sel) + + ag = hydrogens_ag.residues.atoms.select_atoms( + "({donors_sel}) and around {d_h_cutoff} {hydrogens_sel}".format( + donors_sel=selection, + d_h_cutoff=self.d_h_cutoff, + hydrogens_sel=hydrogens_sel + ) + ) + donors_ag = ag[ag.charges < max_charge] + donors_list = np.unique( + [ + '(resname {} and name {})'.format(r, p) for r, p in zip(donors_ag.resnames, donors_ag.names) + ] + ) + + return " or ".join(donors_list) + + def guess_acceptors(self, selection='all', max_charge=-0.5): + """Guesses which atoms could be considered acceptors in the analysis. + + Parameters + ---------- + selection: str (optional) + Selection string for atom group from which acceptors will be identified. + max_charge: float (optional) + Maximum allowed charge of an acceptor atom. + + Returns + ------- + potential_acceptors: str + String containing the :attr:`resname` and :attr:`name` of all atoms that potentially capable of forming + hydrogen bonds. + + Notes + ----- + This function makes use of and atomic charges to identify which atoms could be considered acceptor atoms in the + hydrogen bond analysis. If an atom has an atomic charge less than :attr:`max_charge` then it is considered + capable of participating in hydrogen bonds. + + If :attr:`acceptors_sel` is `None`, this function is called to guess the selection. + + Alternatively, this function may be used to quickly generate a :class:`str` of potential acceptor atoms involved + in hydrogen bonding. This :class:`str` may then be modified before being used to set the attribute + :attr:`acceptors_sel`. + """ + + ag = self.u.select_atoms(selection) + acceptors_ag = ag[ag.charges < max_charge] + acceptors_list = np.unique( + [ + '(resname {} and name {})'.format(r, p) for r, p in zip(acceptors_ag.resnames, acceptors_ag.names) + ] + ) + + return " or ".join(acceptors_list) + + def _get_dh_pairs(self): + """Finds donor-hydrogen pairs. + + Returns + ------- + donors, hydrogens: AtomGroup, AtomGroup + AtomGroups corresponding to all donors and all hydrogens. AtomGroups are ordered such that, if zipped, will + produce a list of donor-hydrogen pairs. + """ + + # If donors_sel is not provided, use topology to find d-h pairs + if not self.donors_sel: + + if len(self.u.bonds) == 0: + raise Exception('Cannot assign donor-hydrogen pairs via topology as no bonded information is present. ' + 'Please either: load a topology file with bonded information; use the guess_bonds() ' + 'topology guesser; or set HydrogenBondAnalysis.donors_sel so that a distance cutoff ' + 'can be used.') + + hydrogens = self.u.select_atoms(self.hydrogens_sel) + donors = sum(h.bonded_atoms[0] for h in hydrogens) + + # Otherwise, use d_h_cutoff as a cutoff distance + else: + + hydrogens = self.u.select_atoms(self.hydrogens_sel) + donors = self.u.select_atoms(self.donors_sel) + donors_indices, hydrogen_indices = capped_distance( + donors.positions, + hydrogens.positions, + max_cutoff=self.d_h_cutoff, + box=self.u.dimensions, + return_distances=False + ).T + + donors = donors[donors_indices] + hydrogens = hydrogens[hydrogen_indices] + + return donors, hydrogens + + def _prepare(self): + + self.hbonds = [[], [], [], [], [], []] + self.frames = np.arange(self.start, self.stop, self.step) + self.timesteps = (self.frames * self.u.trajectory.dt) + self.u.trajectory[0].time + + # Set atom selections if they have not been provided + if not self.acceptors_sel: + self.acceptors_sel = self.guess_acceptors() + if not self.hydrogens_sel: + self.hydrogens_sel = self.guess_hydrogens() + + # Select atom groups + self._acceptors = self.u.select_atoms(self.acceptors_sel, updating=self.update_selections) + self._donors, self._hydrogens = self._get_dh_pairs() + + def _single_frame(self): + + box = self._ts.dimensions + + # Update donor-hydrogen pairs if necessary + if self.update_selections: + self._donors, self._hydrogens = self._get_dh_pairs() + + # find D and A within cutoff distance of one another + # min_cutoff = 1.0 as an atom cannot form a hydrogen bond with itself + d_a_indices, d_a_distances = capped_distance( + self._donors.positions, + self._acceptors.positions, + max_cutoff=self.d_a_cutoff, + min_cutoff=1.0, + box=box, + return_distances=True, + ) + + # Remove D-A pairs more than d_a_cutoff away from one another + tmp_donors = self._donors[d_a_indices.T[0]] + tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] + tmp_acceptors = self._acceptors[d_a_indices.T[1]] + + # Find D-H-A angles greater than d_h_a_angle_cutoff + d_h_a_angles = np.rad2deg( + calc_angles( + tmp_donors.positions, + tmp_hydrogens.positions, + tmp_acceptors.positions, + box=box + ) + ) + hbond_indices = np.where(d_h_a_angles > self.d_h_a_angle)[0] + + # Retrieve atoms, distances and angles of hydrogen bonds + hbond_donors = tmp_donors[hbond_indices] + hbond_hydrogens = tmp_hydrogens[hbond_indices] + hbond_acceptors = tmp_acceptors[hbond_indices] + hbond_distances = d_a_distances[hbond_indices] + hbond_angles = d_h_a_angles[hbond_indices] + + # Store data on hydrogen bonds found at this frame + self.hbonds[0].extend(np.full_like(hbond_donors, self._ts.frame)) + self.hbonds[1].extend(hbond_donors.ids) + self.hbonds[2].extend(hbond_hydrogens.ids) + self.hbonds[3].extend(hbond_acceptors.ids) + self.hbonds[4].extend(hbond_distances) + self.hbonds[5].extend(hbond_angles) + + def _conclude(self): + + self.hbonds = np.asarray(self.hbonds).T + + def count_by_time(self): + """Counts the number of hydrogen bonds per timestep. + + Returns + ------- + counts : numpy.ndarray + Contains the total number of hydrogen bonds found at each timestep. + Can be used along with :attr:`HydrogenBondAnalysis.timesteps` to plot + the number of hydrogen bonds over time. + """ + + indices, tmp_counts = np.unique(self.hbonds[:, 0], axis=0, return_counts=True) + + indices -= self.start + indices /= self.step + + counts = np.zeros_like(self.frames) + counts[indices.astype(np.int)] = tmp_counts + return counts + + def count_by_type(self): + """Counts the total number of each unique type of hydrogen bond. + + Returns + ------- + counts : numpy.ndarray + Each row of the array contains the donor resname, donor atom type, acceptor resname, acceptor atom type and + the total number of times the hydrogen bond was found. + + Note + ---- + Unique hydrogen bonds are determined through a consideration of the resname and atom type of the donor and + acceptor atoms in a hydrogen bond. + """ + + d = self.u.atoms[self.hbonds[:, 1].astype(np.int)] + a = self.u.atoms[self.hbonds[:, 3].astype(np.int)] + + tmp_hbonds = np.array([d.resnames, d.types, a.resnames, a.types], dtype=np.str).T + hbond_type, type_counts = np.unique(tmp_hbonds, axis=0, return_counts=True) + hbond_type_list = [] + for hb_type, hb_count in zip(hbond_type, type_counts): + hbond_type_list.append([":".join(hb_type[:2]), ":".join(hb_type[2:4]), hb_count]) + + return np.array(hbond_type_list) + + def count_by_ids(self): + """Counts the total number hydrogen bonds formed by unique combinations of donor, hydrogen and acceptor atoms. + + Returns + ------- + counts : numpy.ndarray + Each row of the array contains the donor atom id, hydrogen atom id, acceptor atom id and the total number + of times the hydrogen bond was observed. The array is sorted by frequency of occurrence. + + Note + ---- + Unique hydrogen bonds are determined through a consideration of the hydrogen atom id and acceptor atom id + in a hydrogen bond. + """ + + d = self.u.atoms[self.hbonds[:, 1].astype(np.int)] + h = self.u.atoms[self.hbonds[:, 2].astype(np.int)] + a = self.u.atoms[self.hbonds[:, 3].astype(np.int)] + + tmp_hbonds = np.array([d.ids, h.ids, a.ids]).T + hbond_ids, ids_counts = np.unique(tmp_hbonds, axis=0, return_counts=True) + + # Find unique hbonds and sort rows so that most frequent observed bonds are at the top of the array + unique_hbonds = np.concatenate((hbond_ids, ids_counts[:, None]), axis=1) + unique_hbonds = unique_hbonds[unique_hbonds[:, 3].argsort()[::-1]] + + return unique_hbonds From 54872daa3017c1aba1d732567bd2f86fad40fed8 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:02:28 +0100 Subject: [PATCH 2/6] Added tests for the new HydrogenBondAnalysis class. The tests are based on those for the original implementation of the hydrogen bond analysis, which allows for direct comparison of the results to ensure the behaviour of the new class is correct. --- .../analysis/test_hydrogenbonds_analysis.py | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py new file mode 100644 index 00000000000..b4b1ecb5c2a --- /dev/null +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -0,0 +1,199 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + + +from __future__ import absolute_import, division + +import numpy as np +import MDAnalysis +from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis + +import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal +from MDAnalysisTests.datafiles import waterPSF, waterDCD + + +class TestHydrogenBondAnalysisTIP3P(object): + + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + kwargs = { + 'donors_sel': 'name OH2', + 'hydrogens_sel': 'name H1 H2', + 'acceptors_sel': 'name OH2', + 'd_h_cutoff': 1.2, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + + @pytest.fixture(scope='class') + def h(self, universe): + h = HydrogenBondAnalysis(universe, **self.kwargs) + h.run() + return h + + def test_hbond_analysis(self, h): + + assert len(np.unique(h.hbonds[:,0])) == 10 + assert len(h.hbonds) == 32 + + reference = { + 'distance': {'mean': 2.7627309, 'std': 0.0905052}, + 'angle': {'mean': 158.9038039, 'std': 12.0362826}, + } + + assert_allclose(np.mean(h.hbonds[:, 4]), reference['distance']['mean']) + assert_allclose(np.std(h.hbonds[:, 4]), reference['distance']['std']) + assert_allclose(np.mean(h.hbonds[:, 5]), reference['angle']['mean']) + assert_allclose(np.std(h.hbonds[:, 5]), reference['angle']['std']) + + def test_count_by_time(self, h): + + ref_times = np.arange(0.02, 0.21, 0.02) + ref_counts = np.array([3, 2, 4, 4, 4, 4, 3, 2, 3, 3]) + + counts = h.count_by_time() + assert_array_almost_equal(h.timesteps, ref_times) + assert_array_equal(counts, ref_counts) + + def test_count_by_type(self, h): + + # Only one type of hydrogen bond in this system + ref_count = 32 + + counts = h.count_by_type() + assert int(counts[0, 2]) == ref_count + + def test_count_by_ids(self, h): + + ref_counts = [1.0, 1.0, 0.5, 0.4, 0.2, 0.1] + unique_hbonds = h.count_by_ids() + + # count_by_ids() returns raw counts + # convert to fraction of time that bond was observed + counts = unique_hbonds[:, 3] / len(h.timesteps) + + assert_array_equal(counts, ref_counts) + +class TestHydrogenBondAnalysisTIP3P_GuessAcceptors_GuessHydrogens_UseTopology_(TestHydrogenBondAnalysisTIP3P): + """Uses the same distance and cutoff hydrogen bond criteria as :class:`TestHydrogenBondAnalysisTIP3P`, so the + results are identical, but the hydrogens and acceptors are guessed whilst the donor-hydrogen pairs are determined + via the topology. + """ + kwargs = { + 'donors_sel': None, + 'hydrogens_sel': None, + 'acceptors_sel': None, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + +class TestHydrogenBondAnalysisTIP3P_GuessDonors_NoTopology(object): + """Guess the donor atoms involved in hydrogen bonds using the partial charges of the atoms. + """ + + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + kwargs = { + 'donors_sel': None, + 'hydrogens_sel': None, + 'acceptors_sel': None, + 'd_h_cutoff': 1.2, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + + @pytest.fixture(scope='class') + def h(self, universe): + h = HydrogenBondAnalysis(universe, **self.kwargs) + return h + + def test_guess_donors(self, h): + + ref_donors = "(resname TIP3 and name OH2)" + donors = h.guess_donors(selection='all', max_charge=-0.5) + assert donors == ref_donors + + +class TestHydrogenBondAnalysisTIP3PStartStep(object): + """Uses the same distance and cutoff hydrogen bond criteria as :class:`TestHydrogenBondAnalysisTIP3P` but starting + with the second frame and using every other frame in the analysis. + """ + + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + kwargs = { + 'donors_sel': 'name OH2', + 'hydrogens_sel': 'name H1 H2', + 'acceptors_sel': 'name OH2', + 'd_h_cutoff': 1.2, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + + @pytest.fixture(scope='class') + def h(self, universe): + h = HydrogenBondAnalysis(universe, **self.kwargs) + h.run(start=1, step=2) + return h + + def test_hbond_analysis(self, h): + + assert len(np.unique(h.hbonds[:,0])) == 5 + assert len(h.hbonds) == 15 + + reference = { + 'distance': {'mean': 2.73942464, 'std': 0.05867924}, + 'angle': {'mean': 157.07768079, 'std': 9.72636682}, + } + + assert_allclose(np.mean(h.hbonds[:, 4]), reference['distance']['mean']) + assert_allclose(np.std(h.hbonds[:, 4]), reference['distance']['std']) + assert_allclose(np.mean(h.hbonds[:, 5]), reference['angle']['mean']) + assert_allclose(np.std(h.hbonds[:, 5]), reference['angle']['std']) + + def test_count_by_time(self, h): + + ref_times = np.array([0.04, 0.08, 0.12, 0.16, 0.20, ]) + ref_counts = np.array([2, 4, 4, 2, 3]) + + counts = h.count_by_time() + assert_array_almost_equal(h.timesteps, ref_times) + assert_array_equal(counts, ref_counts) + + def test_count_by_type(self, h): + + # Only one type of hydrogen bond in this system + ref_count = 15 + + counts = h.count_by_type() + assert int(counts[0, 2]) == ref_count From d21ae194ab8848be726b32cb92c83ef10e401664 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:03:00 +0100 Subject: [PATCH 3/6] Added for the original implementation of the hbond analysis. New regression test added to directly compare the behaviour of the original implementation with the new one. --- .../MDAnalysisTests/analysis/test_hbonds.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index 14223178abe..b10cce2624e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -397,3 +397,126 @@ def test_table_atoms(self, h, normalized_timeseries, reference_table): # https://github.com/MDAnalysis/mdanalysis/issues/801) for name, ref in reference_table.items(): assert_array_equal(h.table.field(name), ref, err_msg="resname for {0} do not match (Issue #801)") + + +class TestHydrogenBondAnalysisTIP3PHeavyPBC(object): + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + kwargs = { + 'selection1': 'all', + 'selection2': 'all', + 'detect_hydrogens': "distance", + 'distance': 3.0, + 'angle': 120.0, + 'distance_type': 'heavy', + 'pbc': True + } + + @pytest.fixture(scope='class') + def h(self, universe): + h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, **self.kwargs) + h.run(verbose=False) + h.generate_table() + return h + + @pytest.fixture(scope='class') + def normalized_timeseries(self, h): + # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) + # array index: 0 1 2 3 4 5 6 7 8 + timeseries = [[t] + item + for t, hframe in zip(h.timesteps, h.timeseries) + for item in hframe] + return timeseries + + def test_timeseries(self, h, normalized_timeseries): + + assert len(h.timeseries) == 10 + assert len(normalized_timeseries) == 32 + + reference = { + 'distance': {'mean': 2.7627309, 'std': 0.0905052}, + 'angle': {'mean': 158.9038039, 'std': 12.0362826}, + } + + assert_allclose(np.mean([hbond[5] for hbond in normalized_timeseries]), reference['distance']['mean']) + assert_allclose(np.std([hbond[5] for hbond in normalized_timeseries]), reference['distance']['std']) + assert_allclose(np.mean([hbond[6] for hbond in normalized_timeseries]), reference['angle']['mean']) + assert_allclose(np.std([hbond[6] for hbond in normalized_timeseries]), reference['angle']['std']) + + def test_count_by_time(self, h): + + ref_times = np.arange(0.02, 0.21, 0.02) + ref_counts = np.array([3, 2, 4, 4, 4, 4, 3, 2, 3, 3]) + + times, counts = np.array([[count[0], count[1]] for count in h.count_by_time()]).T + + assert_array_almost_equal(times, ref_times) + assert_array_equal(counts, ref_counts) + + def test_count_by_type(self, h): + + ref_values = [1.0, 1.0, 0.5, 0.4, 0.2, 0.1] + + c = h.count_by_type() + assert_equal(np.sort(c.frequency)[::-1], ref_values) + + +class TestHydrogenBondAnalysisTIP3PHeavyPBCStartStep(object): + @staticmethod + @pytest.fixture(scope='class') + def universe(): + return MDAnalysis.Universe(waterPSF, waterDCD) + + kwargs = { + 'selection1': 'all', + 'selection2': 'all', + 'detect_hydrogens': "distance", + 'distance': 3.0, + 'angle': 120.0, + 'distance_type': 'heavy', + 'pbc': True + } + + @pytest.fixture(scope='class') + def h(self, universe): + h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(universe, **self.kwargs) + h.run(start=1, step=2, verbose=False) + h.generate_table() + return h + + @pytest.fixture(scope='class') + def normalized_timeseries(self, h): + # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) + # array index: 0 1 2 3 4 5 6 7 8 + timeseries = [[t] + item + for t, hframe in zip(h.timesteps, h.timeseries) + for item in hframe] + return timeseries + + def test_timeseries(self, h, normalized_timeseries): + + assert len(h.timeseries) == 5 + assert len(normalized_timeseries) == 15 + + reference = { + 'distance': {'mean': 2.73942464, 'std': 0.05867924}, + 'angle': {'mean': 157.07768079, 'std': 9.72636682}, + } + + assert_allclose(np.mean([hbond[5] for hbond in normalized_timeseries]), reference['distance']['mean']) + assert_allclose(np.std([hbond[5] for hbond in normalized_timeseries]), reference['distance']['std']) + assert_allclose(np.mean([hbond[6] for hbond in normalized_timeseries]), reference['angle']['mean']) + assert_allclose(np.std([hbond[6] for hbond in normalized_timeseries]), reference['angle']['std']) + + def test_count_by_time(self, h): + + ref_times = np.array([0.04, 0.08, 0.12, 0.16, 0.20]) + ref_counts = np.array([2, 4, 4, 2, 3]) + + times, counts = np.array([[count[0], count[1]] for count in h.count_by_time()]).T + + assert_array_almost_equal(times, ref_times) + assert_array_equal(counts, ref_counts) From f50305c690c18961048183d474c26575ad84dfde Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:03:22 +0100 Subject: [PATCH 4/6] Added deprecation warnings for the original implementation. Added warnings to :mod:`MDAnalysis.analysis.hbonds.hbond_analysis` and :class:`MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis` --- .../analysis/hbonds/hbond_analysis.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index b0aefe862f6..99fb6287b2c 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -22,13 +22,16 @@ # # Hydrogen Bonding Analysis -r"""Hydrogen Bond analysis --- :mod:`MDAnalysis.analysis.hbonds.hbond_analysis` -=========================================================================== +r"""Hydrogen Bond analysis (Deprecated) --- :mod:`MDAnalysis.analysis.hbonds.hbond_analysis` +============================================================================================ :Author: David Caplan, Lukas Grossar, Oliver Beckstein :Year: 2010-2017 :Copyright: GNU Public License v3 +..Warning: + This module will be deprecated in version 1.0. + Please use :mod:`MDAnalysis.analysis.hydrogenbonds.hbond_analysis` instead. Given a :class:`~MDAnalysis.core.universe.Universe` (simulation trajectory with 1 or more frames) measure all hydrogen bonds for each @@ -336,6 +339,11 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): logger = logging.getLogger('MDAnalysis.analysis.hbonds') +warnings.warn( + "This module will be deprecated in version 1.0." + "Please use MDAnalysis.analysis.hydrogenbonds.hbond_analysis instead.", + category=DeprecationWarning + ) class HydrogenBondAnalysis(base.AnalysisBase): """Perform a hydrogen bond analysis @@ -557,6 +565,13 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ """ super(HydrogenBondAnalysis, self).__init__(universe.trajectory, **kwargs) + + warnings.warn( + "This class will be deprecated in version 1.0." + "Please use MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis instead.", + category=DeprecationWarning + ) + # per-frame debugging output? self.debug = debug From c77382364be576edba92ab48b0d7c08657d031d0 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:04:37 +0100 Subject: [PATCH 5/6] Updated the documentation --- .../source/documentation_pages/analysis/hydrogenbonds.rst | 1 + .../source/documentation_pages/analysis_modules.rst | 1 + .../doc/sphinx/source/documentation_pages/references.rst | 8 ++++++++ 3 files changed, 10 insertions(+) create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/hydrogenbonds.rst diff --git a/package/doc/sphinx/source/documentation_pages/analysis/hydrogenbonds.rst b/package/doc/sphinx/source/documentation_pages/analysis/hydrogenbonds.rst new file mode 100644 index 00000000000..e67253d13af --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/hydrogenbonds.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.analysis.hydrogenbonds.hbond_analysis diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index cd001450042..ad145f0cec1 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -66,6 +66,7 @@ Hydrogen bonding .. toctree:: :maxdepth: 1 + analysis/hydrogenbonds analysis/hbond_analysis analysis/hbond_autocorrel analysis/wbridge_analysis diff --git a/package/doc/sphinx/source/documentation_pages/references.rst b/package/doc/sphinx/source/documentation_pages/references.rst index 81b4d00490f..7c735b3d1b4 100644 --- a/package/doc/sphinx/source/documentation_pages/references.rst +++ b/package/doc/sphinx/source/documentation_pages/references.rst @@ -131,6 +131,14 @@ If you use the streamline visualization in .. _`10.1039/c3fd00145h`: https://doi.org/10.1039/c3fd00145h +If you use the hydrogen bond analysis code in +:mod:`MDAnalysis.analysis.hydrogenbonds.hbond_analysis` please cite [Smith2019]_. + +.. [Smith2019] P. Smith, R. M. Ziolek, E. Gazzarrini, D. M. Owen, and C. D. Lorenz. + On the interaction of hyaluronic acid with synovial fluid lipid membranes. *PCCP* + **21** (2019), 9845-9857. doi: `10.1039/C9CP01532A`_ + +.. _`10.1039/C9CP01532A`: http://dx.doi.org/10.1039/C9CP01532A .. _citations-using-duecredit: From 21c256358a86f3e856ab677f8e2edce9eda927dd Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sat, 26 Oct 2019 15:04:48 +0100 Subject: [PATCH 6/6] Updated the CHANGELOG --- package/CHANGELOG | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 6f3b2c15c9f..74124a35494 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,6 +13,21 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ +23/10/19 p-j-smith + + * 0.21.0 + +Fixes + + +Enhancements + * New analysis.hydrogenbonds.HydrogenBondAnalysis class for the analysis of + hydrogen bonds. Simpler interface, more extensible and better performance + than analysis.hbonds.HydrogenBondAnalysis + +Deprecations + * analysis.hbonds.HydrogenBondAnalysis will be deprecated in 1.0 + mm/dd/yy richardjgowers * 0.20.2