diff --git a/package/CHANGELOG b/package/CHANGELOG index abe47f11d5c..5caa76fca08 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/17 utkbansal, kain88-de, xiki-tempula, kaplajon, wouterboomsma, - richardjgowers, Shtkddud123, QuantumEntangledAndy + richardjgowers, Shtkddud123, QuantumEntangledAndy, orbeckst * 0.16.1 @@ -38,7 +38,9 @@ Fixes (PR #1325) * Fix PDBParser docs for conect (issue #1246) * Fixed bug where amber topology files would fail to load if number of atoms was - exectly divisible by number of atoms per line (issue #1331) + exactly divisible by number of atoms per line (issue #1331) + * Fixed incorrect handling of residue names with trailing numbers in + HydrogenBondAnalysis (issue #801) * Fixed AnalysisBase class provides numerical start,stop,step values (PR #1340) Changes diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 3065dbfbad8..7aa82072ea3 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -65,12 +65,12 @@ results = [ [ # frame 1 [ # hbond 1 - , , , + , , , , , , , ], [ # hbond 2 - , , , + , , , , , , , ], @@ -84,20 +84,24 @@ .. Note:: - For historic reasons, the *donor index* and *acceptor index* are a 1-based - indices. To get the :attr:`Atom.index` (the 0-based index typically used in - MDAnalysis simply subtract 1. For instance, to find an atom in - :attr:`Universe.atoms` by *index* from the output one would use - ``u.atoms[index-1]``. + For historic reasons, the output contains 1-based indices (named *donor idx* + and *acceptor idx*) in addition to 0-based indices (named *donor index* and + *acceptor index*). To get the :attr:`Atom.index` (the 0-based index + typically used in MDAnalysis), use the *index* values (or subtract 1 from + *idx*). For instance, to find an atom in :attr:`Universe.atoms` by *index* + from the output one would use ``u.atoms[index]``. + .. deprecated:: 0.15.0 - This feature is being deprecated in favor of zero-based indices and is targeted - for removal in 0.16.0. + The 1-based indices "donor_idx" and "acceptor_idx" are being deprecated + in favor of zero-based indices "donor_index" and "acceptor_index" and are + targeted for removal in 0.17.0. Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat the results as a flat "normalised" table that is easier to import into a -database for further processing. :meth:`HydrogenBondAnalysis.save_table` saves -the table to a pickled file. The table itself is a :class:`numpy.recarray`. +database or dataframe for further +processing. :meth:`HydrogenBondAnalysis.save_table` saves the table to a +pickled file. The table itself is a :class:`numpy.recarray`. Detection of hydrogen bonds @@ -106,12 +110,12 @@ Hydrogen bonds are recorded based on a geometric criterion: 1. The distance between acceptor and hydrogen is less than or equal to - *distance* (default is 3 Å). + `distance` (default is 3 Å). 2. The angle between donor-hydrogen-acceptor is greater than or equal to - *angle* (default is 120º). + `angle` (default is 120º). -The cut-off values *angle* and *distance* can be set as keywords to +The cut-off values `angle` and `distance` can be set as keywords to :class:`HydrogenBondAnalysis`. Donor and acceptor heavy atoms are detected from atom names. The current @@ -119,19 +123,17 @@ in Table `Default atom names for hydrogen bonding analysis`_. Hydrogen atoms bonded to a donor are searched with one of two algorithms, -selected with the *detect_hydrogens* keyword. - -*distance* +selected with the `detect_hydrogens` keyword. +"distance" Searches for all hydrogens (name "H*" or name "[123]H" or type "H") in the same residue as the donor atom within a cut-off distance of 1.2 Å. -*heuristic* - +"heuristic" Looks at the next three atoms in the list of atoms following the donor and selects any atom whose name matches (name "H*" or name "[123]H"). For -The *distance* search is more rigorous but slower and is set as the +The "distance" search is more rigorous but slower and is set as the default. Until release 0.7.6, only the heuristic search was implemented. .. versionchanged:: 0.7.6 @@ -185,16 +187,16 @@ donors/acceptors for residues such as histidine or cytosine. For more information about the naming convention in GLYCAM06 have a look at the -`Carbohydrate Naming Convention in Glycam`. +`Carbohydrate Naming Convention in Glycam`_. .. _`Carbohydrate Naming Convention in Glycam`: http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm The lists of donor and acceptor names can be extended by providing lists of -atom names in the *donors* and *acceptors* keywords to +atom names in the `donors` and `acceptors` keywords to :class:`HydrogenBondAnalysis`. If the lists are entirely inappropriate (e.g. when analysing simulations done with a force field that uses very -different atom names) then one should either use the value "other" for *forcefield* +different atom names) then one should either use the value "other" for `forcefield` to set no default values, or derive a new class and set the default list oneself:: class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): @@ -202,7 +204,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} Then simply use the new class instead of the parent class and call it with -*forcefield* = "OtherFF". Please also consider to contribute the list of heavy +`forcefield` = "OtherFF". Please also consider to contribute the list of heavy atom names to MDAnalysis. .. rubric:: References @@ -223,27 +225,55 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): import MDAnalysis.analysis.hbonds u = MDAnalysis.Universe('topology', 'trajectory') - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'protein', distance=3.0, angle=120.0) + h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'protein', 'resname HOH', distance=3.0, angle=120.0) h.run() -The results are stored as the attribute -:attr:`HydrogenBondAnalysis.timeseries`; see :ref:`Analysis Output` -for the format and further options. - .. Note:: Due to the way :class:`HydrogenBondAnalysis` is implemented, it is - more efficient to have the second selection (*selection2*) be the + more efficient to have the second selection (`selection2`) be the *larger* group, e.g. the water when looking at water-protein H-bonds or the whole protein when looking at ligand-protein interactions. -.. Note:: - The topology supplied and the trajectory must reflect the same total number - of atoms. +The results are stored as the attribute +:attr:`HydrogenBondAnalysis.timeseries`; see :ref:`Analysis Output` for the +format and further options. + +A number of convenience functions are provided to process the +:attr:`~HydrogenBondAnalysis.timeseries` according to varying criteria: + +:meth:`~HydrogenBondAnalysis.count_by_time` + time series of the number of hydrogen bonds per time step +:meth:`~HydrogenBondAnalysis.count_by_type` + data structure with the frequency of each observed hydrogen bond +:meth:`~HydrogenBondAnalysis.timesteps_by_type` + data structure with a list of time steps for each observed hydrogen bond + +For further data analysis it is convenient to process the +:attr:`~HydrogenBondAnalysis.timeseries` data into a normalized table with the +:meth:`~HydrogenBondAnalysis.generate_table` method, which creates a new data +structure :attr:`HydrogenBondAnalysis.table` that contains one row for each +observation of a hydrogen bond:: -.. TODO: how to analyse the ouput and notes on selection updating + h.generate_table() + +This table can then be easily turned into, e.g., a `pandas.DataFrame`_, and +further analyzed:: + + import pandas as pd + df = pd.DataFrame.from_records(h.table) + +For example, plotting a histogram of the hydrogen bond angles and lengths is as +simple as :: + + df.hist(column=["angle", "distance"]) + +.. _pandas.DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html + + +.. TODO: notes on selection updating Classes @@ -258,44 +288,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): :attr:`~HydrogenBondAnalysis.timeseries` to find the specific time point of a hydrogen bond existence, or see :attr:`~HydrogenBondAnalysis.table`. - .. attribute:: timeseries - - Results of the hydrogen bond analysis, stored for each frame. In - the following description, # indicates comments that are not - part of the output:: - - results = [ - [ # frame 1 - [ # hbond 1 - , , , - , , , - , - ], - [ # hbond 2 - , , , - , , , - , - ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - - The time of each step is not stored with each hydrogen bond frame but in - :attr:`~HydrogenBondAnalysis.timesteps`. - - .. Note:: - - The *index* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis simply subtract 1. For - instance, to find an atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[index-1]``. - - - .. attribute:: table A normalised table of the data in @@ -317,32 +309,40 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): 11. "distance" 12. "angle" - It takes up more space than - :attr:`~HydrogenBondAnalysis.timeseries` but it is easier to - analyze and to import into databases (e.g. using recsql_). + It takes up more space than :attr:`~HydrogenBondAnalysis.timeseries` but + it is easier to analyze and to import into databases or dataframes. + + + .. rubric:: Example + + For example, to create a `pandas.DataFrame`_ from ``h.table``:: + + import pandas as pd + df = pd.DataFrame.from_records(h.table) .. Note:: - The *index* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis simply subtract 1. For - instance, to find an atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[idx_zero]``. The 1-based index is deprecated and - targeted for removal in 0.16.0 + The index variables *donor_idx* and *acceptor_idx* are 1-based + indices. To get the :attr:`Atom.index` (the 0-based index typically + used in MDAnalysis) simply subtract 1, or better, use the 0-based + variables named *donor_index* and *acceptor_index*. + For instance, to find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. + .. deprecated:: 0.15.0 + The 1-based donor and acceptor indices (*donor_idx* and + *acceptor_idx*) are deprecated in favor of 0-based indices. The + 0-based indices can be accessed by *donor_index* and *acceptor_index*; + removal of the 1-based indices is targeted for version 0.17.0 + .. automethod:: _get_bonded_hydrogens .. automethod:: _get_bonded_hydrogens_dist .. automethod:: _get_bonded_hydrogens_list - .. deprecated:: 0.15.0 - The donor and acceptor indices being 1-based is deprecated in favor of - a zero-based index. This can be accessed by "donor_index" or - "acceptor_index" removal of the 1-based indices is targeted - for version 0.16.0 - """ from __future__ import division, absolute_import import six @@ -354,7 +354,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): import logging from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning -from MDAnalysis.lib.util import parse_residue from MDAnalysis.lib.mdamath import norm, angle from MDAnalysis.lib.log import ProgressMeter, _set_verbose from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch @@ -402,9 +401,9 @@ class HydrogenBondAnalysis(object): # table; set() takes care for removing duplicates. At the end the # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. - # : default heavy atom names whose hydrogens are treated as *donors* - # : (see :ref:`Default atom names for hydrogen bonding analysis`) - #: Use the keyword *donors* to add a list of additional donor names. + #: default heavy atom names whose hydrogens are treated as *donors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `donors` to add a list of additional donor names. DEFAULT_DONORS = { 'CHARMM27': tuple(set([ 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), @@ -412,8 +411,8 @@ class HydrogenBondAnalysis(object): 'other': tuple(set([]))} #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`) - #: Use the keyword *acceptors* to add a list of additional acceptor names. + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `acceptors` to add a list of additional acceptor names. DEFAULT_ACCEPTORS = { 'CHARMM27': tuple(set([ 'O', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), @@ -439,35 +438,19 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ The timeseries is accessible as the attribute :attr:`HydrogenBondAnalysis.timeseries`. Some initial checks are performed. If there are no atoms selected by - *selection1* or *selection2* or if no donor hydrogens or acceptor atoms + `selection1` or `selection2` or if no donor hydrogens or acceptor atoms are found then a :exc:`SelectionError` is raised for any selection that - does *not* update (*update_selection1* and *update_selection2* + does *not* update (`update_selection1` and `update_selection2` keywords). For selections that are set to update, only a warning is logged because it is assumed that the selection might contain atoms at a later frame (e.g. for distance based selections). If no hydrogen bonds are detected or if the initial check fails, look at the log output (enable with :func:`MDAnalysis.start_logging` and set - *verbose* = ``True``). It is likely that the default names for donors + `verbose` ``=True``). It is likely that the default names for donors and acceptors are not suitable (especially for non-standard - ligands). In this case, either change the *forcefield* or use - customized *donors* and/or *acceptors*. - - .. Note:: - - In order to speed up processing, atoms are filtered by a coarse - distance criterion before a detailed hydrogen bonding analysis is - performed (*filter_first* = ``True``). If one of your selections is - e.g. the solvent then *update_selection1* (or *update_selection2*) - must also be ``True`` so that the list of candidate atoms is updated - at each step: this is now the default. - - If your selections will essentially remain the same for all time - steps (i.e. residues are not moving farther than 3 x *distance*), for - instance, if no water or large conformational changes are involved - or if the optimization is disabled (*filter_first* = ``False``) then - you can improve performance by setting the *update_selection* - keywords to ``False``. + ligands). In this case, either change the `forcefield` or use + customized `donors` and/or `acceptors`. Parameters ---------- @@ -477,33 +460,35 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ Selection string for first selection ['protein'] selection2 : str (optional) Selection string for second selection ['all'] - selection1_type : str (optional) + selection1_type : {"donor", "acceptor", "both"} (optional) Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the - value for *selection1_type* automatically determines how - *selection2* handles donors and acceptors: If *selection1* contains - 'both' then *selection2* will also contain *both*. If *selection1* - is set to 'donor' then *selection2* is 'acceptor' (and vice versa). + value for `selection1_type` automatically determines how + `selection2` handles donors and acceptors: If `selection1` contains + 'both' then `selection2` will also contain 'both'. If `selection1` + is set to 'donor' then `selection2` is 'acceptor' (and vice versa). ['both']. update_selection1 : bool (optional) - Update selection 1 at each frame? [``False``] + Update selection 1 at each frame? Setting to ``False`` is recommended + for any static selection to increase performance. [``True``] update_selection2 : bool (optional) - Update selection 2 at each frame? [``False``] + Update selection 2 at each frame? Setting to ``False`` is recommended + for any static selection to increase performance. [``True``] filter_first : bool (optional) - Filter selection 2 first to only atoms 3*distance away [``True``] + Filter selection 2 first to only atoms 3 * `distance` away [``True``] distance : float (optional) Distance cutoff for hydrogen bonds; only interactions with a H-A distance - <= *distance* (and the appropriate D-H-A angle, see *angle*) are - recorded. (Note: *distance_type* can change this to the D-A distance.) [3.0] + <= `distance` (and the appropriate D-H-A angle, see `angle`) are + recorded. (Note: `distance_type` can change this to the D-A distance.) [3.0] angle : float (optional) Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of 180º. A hydrogen bond is only recorded if the D-H-A angle is - >= *angle*. The default of 120º also finds fairly non-specific + >= `angle`. The default of 120º also finds fairly non-specific hydrogen interactions and a possibly better value is 150º. [120.0] forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) Name of the forcefield used. Switches between different :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS` values. - Available values: "CHARMM27", "GLYCAM06", "other" ["CHARMM27"] + ["CHARMM27"] donors : sequence (optional) Extra H donor atom types (in addition to those in :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS`), must be a sequence. @@ -512,18 +497,13 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS`), must be a sequence. start : int (optional) starting frame-index for analysis, ``None`` is the first one, 0. - *start* and *stop* are 0-based frame indices and are used to slice + `start` and `stop` are 0-based frame indices and are used to slice the trajectory (if supported) [``None``] stop : int (optional) last trajectory frame for analysis, ``None`` is the last one [``None``] step : int (optional) - read every *step* between *start* and *stop*, ``None`` selects 1. - Note that not all trajectory reader from 1 [``None``] - debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) + read every `step` between `start` (included) and `stop` (excluded), + ``None`` selects 1. [``None``] detect_hydrogens : {"distance", "heuristic"} (optional) Determine the algorithm to find hydrogens connected to donor atoms. Can be "distance" (default; finds all hydrogens in the @@ -537,6 +517,14 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ attoms ("heavy") or between donor hydrogen and acceptor heavy atom ("hydrogen"). If using "heavy" then one should set the *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] + debug : bool (optional) + If set to ``True`` enables per-frame debug logging. This is disabled + by default because it generates a very large amount of output in + the log file. (Note that a logger must have been started to see + the output, e.g. using :func:`MDAnalysis.start_logging`.) + verbose : bool (optional) + Toggle progress output. (Can also be given as keyword argument to + :meth:`run`.) Raises ------ @@ -544,28 +532,44 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ is raised for each static selection without the required donors and/or acceptors. + Notes + ----- + In order to speed up processing, atoms are filtered by a coarse + distance criterion before a detailed hydrogen bonding analysis is + performed (`filter_first` = ``True``). If one of your selections is + e.g. the solvent then `update_selection1` (or `update_selection2`) must + also be ``True`` so that the list of candidate atoms is updated at each + step: this is now the default. + + If your selections will essentially remain the same for all time steps + (i.e. residues are not moving farther than 3 x `distance`), for + instance, if neither water nor large conformational changes are + involved or if the optimization is disabled (`filter_first` = + ``False``) then you can improve performance by setting the + `update_selection1` and/or `update_selection2` keywords to ``False``. + .. versionchanged:: 0.7.6 - New *verbose* keyword (and per-frame debug logging disabled by + New `verbose` keyword (and per-frame debug logging disabled by default). - New *detect_hydrogens* keyword to switch between two different + New `detect_hydrogens` keyword to switch between two different algorithms to detect hydrogens bonded to donor. "distance" is a new, rigorous distance search within the residue of the donor atom, "heuristic" is the previous list scan (improved with an additional distance check). - New *forcefield* keyword to switch between different values of + New `forcefield` keyword to switch between different values of DEFAULT_DONORS/ACCEPTORS to accomodate different force fields. Also has an option "other" for no default values. .. versionchanged:: 0.8 - The new default for *update_selection1* and *update_selection2* is now + The new default for `update_selection1` and `update_selection2` is now ``True`` (see `Issue 138`_). Set to ``False`` if your selections only need to be determined once (will increase performance). .. versionchanged:: 0.9.0 - New keyword *distance_type* to select between calculation between + New keyword `distance_type` to select between calculation between heavy atoms or hydrogen-acceptor. It defaults to the previous behavior (i.e. "hydrogen"). @@ -573,17 +577,41 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ Initial checks for selections that potentially raise :exc:`SelectionError`. .. deprecated:: 0.16 - The *verbose* keyword argument is replaced by *debug*. Note that the - *verbose* keyword argument is now comsistently used to toggle - progress meters throuthout the library. + The previous `verbose` keyword argument was replaced by + `debug`. Note that the `verbose` keyword argument is now + consistently used to toggle progress meters throughout the library. .. _`Issue 138`: https://github.com/MDAnalysis/mdanalysis/issues/138 + """ warnings.warn( "The donor and acceptor indices being 1-based is deprecated in favor" " of a zero-based index. These can be accessed by 'donor_index' or" " 'acceptor_index', removal of the 1-based indices is targeted for" - " version 0.16.0", category=DeprecationWarning) + " version 0.17.0", category=DeprecationWarning) + # REMOVAL (DEPRECATION) instructions for 0.17.0: "1-based indices" (aka "idx" vs 0-based "index") + # - remove the warning + # - replace 'deprecated 0.15.0' with 'versionchanged 0.17.0' (and adjust text) + # - update docs for "timeseries" by removing all "idx (1-based)" entries: + # one hydrogen bond should now look like + # , , , , + # , + # - update docs for "table": removed *_idx and renumber + # - in run(): removed the 1-based indices (h.index + 1, a.index + 1) from frame_results: + # for instance, should now read (two occurences!) + # frame_results.append( + # [h.index, a.index, + # (h.resname, h.resid, h.name), + # (a.resname, a.resid, a.name), + # dist, angle]) + # - _reformat_hb(): change indices hb[:4] --> hb[:2] + # - generate_table(), count_*(), timesteps_by_type(): + # remove any donor_idx, acceptor_idx variables and fields. + # - update tests (change indices in analysis/test_hbonds.py) + # - update any docs in this file that mention "1-based" + # + # I suggest to keep this removal a separate commit. [@orbeckst] + self._get_bonded_hydrogens_algorithms = { "distance": self._get_bonded_hydrogens_dist, # 0.7.6 default @@ -620,7 +648,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ elif self.selection1_type not in ('both', 'donor', 'acceptor'): raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format(self.selection1_type)) - self.timeseries = None # final result + self._timeseries = None # final result accessed as self.timeseries self.timesteps = None # time for each frame self.table = None # placeholder for output table @@ -698,15 +726,25 @@ def _log_parameters(self): logger.info("HBond analysis: bonded hydrogen detection algorithm: %r", self.detect_hydrogens) def _get_bonded_hydrogens(self, atom, **kwargs): - """Find hydrogens bonded to *atom*. + """Find hydrogens bonded to `atom`. This method is typically not called by a user but it is documented to facilitate understanding of the internals of :class:`HydrogenBondAnalysis`. - :Returns: list of hydrogens (can be a - :class:`~MDAnalysis.core.groups.AtomGroup`) or empty list - ``[]`` if none were found. + Parameters + ---------- + atom : groups.Atom + heavy atom + **kwargs + passed through to the calculation method that was selected with + the `detect_hydrogens` kwarg of :class:`HydrogenBondAnalysis`. + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. See Also -------- @@ -715,30 +753,47 @@ def _get_bonded_hydrogens(self, atom, **kwargs): .. versionchanged:: 0.7.6 - Can switch algorithm by using the *detect_hydrogens* keyword to the + Can switch algorithm by using the `detect_hydrogens` keyword to the constructor. *kwargs* can be used to supply arguments for algorithm. + """ return self._get_bonded_hydrogens_algorithms[self.detect_hydrogens](atom, **kwargs) def _get_bonded_hydrogens_dist(self, atom): - """Find hydrogens bonded within *cutoff* to *atom*. + """Find hydrogens bonded within cutoff to `atom`. + + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); + this is not fool-proof as the atom type is not always a character but + the name pattern should catch most typical occurrences. - * hydrogens are detected by either name ("H*", "[123]H*") or type - ("H"); this is not fool-proof as the atom type is not always a - character but the name pattern should catch most typical occurrences. + The distance from `atom` is calculated for all hydrogens in the residue + and only those within a cutoff are kept. The cutoff depends on the + heavy atom (more precisely, on its element, which is taken as the first + letter of its name ``atom.name[0]``) and is parameterized in + :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the + default of 1.5 Å is used. + + + Parameters + ---------- + atom : groups.Atom + heavy atom - * The distance from *atom* is calculated for all hydrogens in the - residue and only those within a cutoff are kept. The cutoff depends - on the heavy atom (more precisely, on its element, which is taken as - the first letter of its name ``atom.name[0]``) and is parameterized - in :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the - default of 1.5 Å is used. + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + Notes + ----- The performance of this implementation could be improved once the topology always contains bonded information; it currently uses the selection parser with an "around" selection. + .. versionadded:: 0.7.6 + """ try: return atom.residue.atoms.select_atoms( @@ -760,6 +815,20 @@ def _get_bonded_hydrogens_list(self, atom, **kwargs): depends on the heavy atom and is parameterized in :attr:`HydrogenBondAnalysis.r_cov`. + Parameters + ---------- + atom : groups.Atom + heavy atom + **kwargs + ignored + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + + .. versionchanged:: 0.7.6 Added detection of ``[123]H`` and additional check that a @@ -851,36 +920,43 @@ def run(self, **kwargs): :attr:`HydrogenBondAnalysis.timeseries` (see there for output format). - The method accepts a number of keywords, amongst them *verbose* - (default ``True``), which toggles the porgress output (see - :class:`~MDAnalysis.lib.log.ProgressMeter`) and *debug* which can - be used to change the debug value provided to the class constructor. + Parameters + ---------- + verbose : bool (optional) + toggle progress meter output :class:`~MDAnalysis.lib.log.ProgressMeter` + [``True``] + debug : bool (optional) + enable detailed logging of debugging information; this can create + *very big* log files so it is disabled (``False``) by default; setting + `debug` toggles the debug status for :class:`HydrogenBondAnalysis`, + namely the value of :attr:`HydrogenBondAnalysis.debug`. - Note - ---- - Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data into a different format. + Other Parameters + ---------------- + remove_duplicates : bool (optional) + duplicate hydrogen bonds are removed from output if set to the + default value ``True``; normally, this should not be changed. + + See Also + -------- + :meth:`HydrogenBondAnalysis.generate_table` : + processing the data into a different format. .. versionchanged:: 0.7.6 Results are not returned, only stored in :attr:`~HydrogenBondAnalysis.timeseries` and duplicate hydrogen bonds - are removed from output (can be suppressed with *remove_duplicates* = + are removed from output (can be suppressed with `remove_duplicates` = ``False``) .. versionchanged:: 0.11.0 - Accept *quiet* keyword. Analysis will now proceed through frames even if + Accept `quiet` keyword. Analysis will now proceed through frames even if no donors or acceptors were found in a particular frame. - .. deprecated:: 0.15.0 - The donor and acceptor indices being 1-based is deprecated in favor - of a zero-based index. This can be accessed by "donor_index" or - "acceptor_index" removal of the 1-based indices is targeted - for version 0.16.0 - .. deprecated:: 0.16 - The *quiet* keyword argument is deprecated in favor of the *verbose* - one. Previous use of *verbose* now corresponds to the new keyword - argument *debug*. + The `quiet` keyword argument is deprecated in favor of the `verbose` + one. Previous use of `verbose` now corresponds to the new keyword + argument `debug`. """ logger.info("HBond analysis: starting") @@ -898,7 +974,7 @@ def run(self, **kwargs): if not self.debug: logger.debug("HBond analysis: For full step-by-step debugging output use debug=True") - self.timeseries = [] + self._timeseries = [] self.timesteps = [] logger.info("checking trajectory...") # n_frames can take a while! @@ -917,16 +993,13 @@ def run(self, **kwargs): try: self.u.trajectory.time - def _get_timestep(): return self.u.trajectory.time - logger.debug("HBond analysis is recording time step") except NotImplementedError: # chained reader or xyz(?) cannot do time yet def _get_timestep(): return self.u.trajectory.frame - logger.warn("HBond analysis is recording frame number instead of time step") logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", @@ -936,7 +1009,7 @@ def _get_timestep(): for progress, ts in enumerate(self.u.trajectory[self.traj_slice]): # all bonds for this timestep frame_results = [] - # dict of tuples (atomid, atomid) for quick check if + # dict of tuples (atom.index, atom.index) for quick check if # we already have the bond (to avoid duplicates) already_found = {} @@ -964,15 +1037,14 @@ def _get_timestep(): dist = self.calc_eucl_distance(donor_atom, a) if angle >= self.angle and dist <= self.distance: self.logger_debug( - "S1-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG".format(h.index + 1, a.index + 1, dist, angle)) - #self.logger_debug("S1-D: %r <-> S2-A: %r %f A, %f DEG" % (h, a, dist, angle)) + "S1-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG".format(h.index, a.index, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, - '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name), - '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name), + (h.resname, h.resid, h.name), + (a.resname, a.resid, a.name), dist, angle]) - already_found[(h.index + 1, a.index + 1)] = True + already_found[(h.index, a.index)] = True if self.selection1_type in ('acceptor', 'both') and self._s1_acceptors: self.logger_debug("Selection 1 Acceptors <-> Donors") ns_acceptors = AtomNeighborSearch(self._s1_acceptors) @@ -982,26 +1054,25 @@ def _get_timestep(): res = ns_acceptors.search(h, self.distance) for a in res: if remove_duplicates and ( - (h.index + 1, a.index + 1) in already_found - or (a.index + 1, h.index + 1) in already_found): + (h.index, a.index) in already_found or + (a.index, h.index) in already_found): continue angle = self.calc_angle(d, h, a) donor_atom = h if self.distance_type != 'heavy' else d dist = self.calc_eucl_distance(donor_atom, a) if angle >= self.angle and dist <= self.distance: self.logger_debug( - "S1-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG".format(a.index + 1, h.index + 1, dist, angle)) - #self.logger_debug("S1-A: %r <-> S2-D: %r %f A, %f DEG" % (a, h, dist, angle)) + "S1-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG".format(a.index, h.index, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, - '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name), - '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name), + (h.resname, h.resid, h.name), + (a.resname, a.resid, a.name), dist, angle]) - self.timeseries.append(frame_results) + self._timeseries.append(frame_results) - logger.info("HBond analysis: complete; timeseries with %d hbonds in %s.timeseries", - self.count_by_time().count.sum(), self.__class__.__name__) + logger.info("HBond analysis: complete; timeseries %s.timeseries", + self.__class__.__name__) @staticmethod def calc_angle(d, h, a): @@ -1017,38 +1088,126 @@ def calc_eucl_distance(a1, a2): """Calculate the Euclidean distance between two atoms. """ return norm(a2.position - a1.position) + @property + def timeseries(self): + """Time series of hydrogen bonds. + + The results of the hydrogen bond analysis can be accessed as a `list` of `list` of `list`: + + 1. `timeseries[i]`: data for the i-th trajectory frame (at time + `timesteps[i]`, see :attr:`timesteps`) + 2. `timeseries[i][j]`: j-th hydrogen bond that was detected at the i-th + frame. + 3. ``donor_idx, acceptor_idx, donor_index, acceptor_index, + donor_name_str, acceptor_name_str, distance, angle = + timeseries[i][j]``: structure of one hydrogen bond data item + + + In the following description, ``#`` indicates comments that are not + part of the output:: + + results = [ + [ # frame 1 + [ # hbond 1 + , , , + , , , + , + ], + [ # hbond 2 + , , , + , , , + , + ], + .... + ], + [ # frame 2 + [ ... ], [ ... ], ... + ], + ... + ] + + The time of each step is not stored with each hydrogen bond frame but in + :attr:`~HydrogenBondAnalysis.timesteps`. + + + Note + ---- + The index variables named *donor_idx* and *acceptor_idx* are 1-based + indices, which were used historically. To get the :attr:`Atom.index` + (the 0-based index typically used in MDAnalysis) simply subtract 1, or + better, use the 0-based variables named *donor_index* and + *acceptor_index*. + + For instance, to find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. + + The :attr:`timeseries` is a managed attribute and it is generated + from the underlying data in :attr:`_timeseries` every time the + attribute is accessed. It is therefore costly to call and if + :attr:`timeseries` is needed repeatedly it is recommended that you + assign to a variable:: + + h = HydrogenBondAnalysis(u) + h.run() + timeseries = h.timeseries + + + See Also + -------- + :attr:`table` : structured array of the data + + + .. versionchanged:: 0.16.1 + :attr:`timeseries` has become a managed attribute and is generated from the stored + :attr:`_timeseries` when needed. :attr:`_timeseries` contains the donor atom and + acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings. + + .. deprecated:: 0.15.0 + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of the 0-based indices "donor_index" and + "acceptor_index" and they are targeted for removal in 0.17.0. + + """ + return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries] + + @staticmethod + def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"): + """Convert 0.16.1 _timeseries hbond item to 0.16.0 hbond item. + + In 0.16.1, donor and acceptor are stored as a tuple(resname, + resid, atomid). In 0.16.0 and earlier they were stored as a string. + + .. deprecated:: 1.0 + This is a compatibility layer so that we can provide the same output + in timeseries as before. However, for 1.0 we should make timeseries + just return _timeseries, i.e., change the format of timeseries to + the un-ambiguous representation provided in _timeseries. + + """ + # change indices once we remove 1-based donor_idx and acceptor_idx: + # change indices hb[:4]... --> hb[:2], hb[2], hb[3], hb[4:] + return (hb[:4] + + [atomformat.format(hb[4]), atomformat.format(hb[5])] + + hb[6:]) + def generate_table(self): """Generate a normalised table of the results. The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~HydrogenBondAnalysis.table` and can be used - with e.g. `recsql`_. - - Columns: - 0. "time" - 1. "donor_idx" - 2. "acceptor_idx" - 3. "donor_index" - 4. "acceptor_index" - 5. "donor_resnm" - 6. "donor_resid" - 7. "donor_atom" - 8. "acceptor_resnm" - 9. "acceptor_resid" - 10. "acceptor_atom" - 11. "distance" - 12. "angle" + attribute :attr:`~HydrogenBondAnalysis.table`. + See Also + -------- + HydrogenBondAnalysis.table - .. _recsql: http://pypi.python.org/pypi/RecSQL """ - if self.timeseries is None: + if self._timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return - num_records = np.sum([len(hframe) for hframe in self.timeseries]) + num_records = np.sum([len(hframe) for hframe in self._timeseries]) # build empty output table dtype = [ ("time", float), ("donor_idx", int), ("acceptor_idx", int), @@ -1060,11 +1219,12 @@ def generate_table(self): # and speedups of ~x10 can be achieved by filling a standard array, like this: out = np.empty((num_records,), dtype=dtype) cursor = 0 # current row - for t, hframe in zip(self.timesteps, self.timeseries): + for t, hframe in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: + acceptor, distance, angle) in hframe: + # donor|acceptor = (resname, resid, atomid) out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \ - parse_residue(donor) + parse_residue(acceptor) + (distance, angle) + donor + acceptor + (distance, angle) cursor += 1 assert cursor == num_records, "Internal Error: Not all HB records stored" self.table = out.view(np.recarray) @@ -1073,6 +1233,16 @@ def generate_table(self): def save_table(self, filename="hbond_table.pickle"): """Saves :attr:`~HydrogenBondAnalysis.table` to a pickled file. + If :attr:`~HydrogenBondAnalysis.table` does not exist yet, + :meth:`generate_table` is called first. + + Parameters + ---------- + filename : str (optional) + path to the filename + + Example + ------- Load with :: import cPickle @@ -1083,47 +1253,68 @@ def save_table(self, filename="hbond_table.pickle"): self.generate_table() cPickle.dump(self.table, open(filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) + def _has_timeseries(self): + has_timeseries = self._timeseries is not None + if not has_timeseries: + msg = "No timeseries computed, do run() first." + warnings.warn(msg, category=MissingDataWarning) + logger.warn(msg) + return has_timeseries + def count_by_time(self): """Counts the number of hydrogen bonds per timestep. - :Returns: a class:`numpy.recarray` - """ + Processes :attr:`HydrogenBondAnalysis._timeseries` into the time series + ``N(t)`` where ``N`` is the total number of observed hydrogen bonds at + time ``t``. - if self.timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + Returns + ------- + counts : numpy.recarray + The resulting array can be thought of as rows ``(time, N)`` where + ``time`` is the time (in ps) of the time step and ``N`` is the + total number of hydrogen bonds. + + """ + if not self._has_timeseries(): return out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)]) for cursor, time_count in enumerate(zip(self.timesteps, - (len(series) for series in self.timeseries))): + (len(series) for series in self._timeseries))): out[cursor] = time_count return out.view(np.recarray) def count_by_type(self): """Counts the frequency of hydrogen bonds of a specific type. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns - a :class:`numpy.recarray` containing atom indices, residue - names, residue numbers (for donors and acceptors) and the - fraction of the total time during which the hydrogen bond was - detected. + Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a + :class:`numpy.recarray` containing atom indices, residue names, residue + numbers (for donors and acceptors) and the fraction of the total time + during which the hydrogen bond was detected. + + Returns + ------- + counts : numpy.recarray + Each row of the array contains data to define a unique hydrogen + bond together with the frequency (fraction of the total time) that + it has been observed. + - :Returns: a class:`numpy.recarray` + .. deprecated:: 0.15.0 + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of zero-based indices and they are targeted for + removal in 0.17.0. """ - if self.timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + if not self._has_timeseries(): return hbonds = defaultdict(int) - for hframe in self.timeseries: + for hframe in self._timeseries: for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = parse_residue(donor) - acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) + acceptor, distance, angle) in hframe: + donor_resnm, donor_resid, donor_atom = donor + acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond \ # (the donor_heavy_atom placeholder '?' is added later) # idx_zero is redundant for an unambigous key, but included for @@ -1165,26 +1356,36 @@ def count_by_type(self): def timesteps_by_type(self): """Frames during which each hydrogen bond existed, sorted by hydrogen bond. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns - a :class:`numpy.recarray` containing atom indices, residue - names, residue numbers (for donors and acceptors) and a list - of timesteps at which the hydrogen bond was detected. + Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a + :class:`numpy.recarray` containing atom indices, residue names, residue + numbers (for donors and acceptors) and each timestep at which the + hydrogen bond was detected. - :Returns: a class:`numpy.recarray` - """ + In principle, this is the same as :attr:`~HydrogenBondAnalysis.table` + but sorted by hydrogen bond and with additional data for the + *donor_heavy_atom* and angle and distance omitted. - if self.timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + + Returns + ------- + data : numpy.recarray + + + .. deprecated:: 0.15.0 + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of zero-based indices and they are targeted for + removal in 0.17.0. + + """ + if not self._has_timeseries(): return hbonds = defaultdict(list) - for (t, hframe) in zip(self.timesteps, self.timeseries): + for (t, hframe) in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = parse_residue(donor) - acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) + acceptor, distance, angle) in hframe: + donor_resnm, donor_resid, donor_atom = donor + acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond # (the donor_heavy_atom placeholder '?' is added later) # idx_zero is redundant for key but added for consistency @@ -1235,7 +1436,7 @@ def _donor_lookup_table_byres(self): * selections have not changed (because we are simply looking at the last content of the donors and donor hydrogen lists) - Donors from *selection1* and *selection2* are merged. + Donors from `selection1` and `selection2` are merged. Output dictionary ``h2donor`` can be used as:: @@ -1273,22 +1474,12 @@ def _donor_lookup_table_byindex(self): * selections have not changed (because we are simply looking at the last content of the donors and donor hydrogen lists) - Donors from *selection1* and *selection2* are merged. + Donors from `selection1` and `selection2` are merged. Output dictionary ``h2donor`` can be used as:: heavy_atom_name = h2donor[index] - Note - ---- - *index* is the 0-based MDAnalysis index - (:attr:`MDAnalysis.core.groups.Atom.index`). The - tables generated by :class:`HydrogenBondAnalysis` contain - 1-based indices and zero-based indices. - - .. deprecated:: 0.15.0 - The 1-based indices are deprecated in favor of the zero-based indices - given by "idx_zero". """ s1d = self._s1_donors # list of donor Atom instances s1h = self._s1_donors_h # dict indexed by donor position in donor list, containg AtomGroups of H diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index aca854145f2..2e8d1082d46 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -26,14 +26,17 @@ from MDAnalysis import SelectionError, SelectionWarning from numpy.testing import (assert_, assert_equal, assert_array_equal, - assert_raises) + assert_almost_equal, assert_array_almost_equal, + assert_raises, dec) import numpy as np import itertools import warnings from six import StringIO -from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC +from MDAnalysisTests import parser_not_found +from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC, waterPSF, waterDCD + # For type guessing: from MDAnalysis.topology.core import guess_atom_type from MDAnalysis.core.topologyattrs import Atomtypes @@ -226,3 +229,92 @@ def run_HBA_dynamic_selections(*args): yield run_HBA_dynamic_selections, s1, s2, s1type finally: self._tearDown() + + +class TestHydrogenBondAnalysisTIP3P(object): + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def setUp(self): + self.universe = u = MDAnalysis.Universe(waterPSF, waterDCD) + self.kwargs = { + 'selection1': 'all', + 'selection2': 'all', + 'detect_hydrogens': "distance", + 'distance': 3.0, + 'angle': 120.0, + } + self.h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, **self.kwargs) + self.h.run(verbose=False) + self.h.generate_table() + self.normalized_timeseries = self._normalize_timeseries() + + # keys are the names in the h.table + self.reference = { + 'distance': {'mean': 2.0208776, 'std': 0.31740859}, + 'angle': {'mean': 155.13521, 'std': 12.98955}, + } + + # reference values for the table only + self.reference_table = { + 'donor_resnm': ["TIP3"] * len(self.normalized_timeseries), + 'acceptor_resnm': ["TIP3"] * len(self.normalized_timeseries), + } + + # index into timeseries (ADJUST ONCE donor_idx and acceptor_ndx are removed) + # with keys being field names in h.table + self.columns = { + 'time': 0, + 'donor_idx': 1, + 'acceptor_idx': 2, + 'donor_index': 3, + 'acceptor_index': 4, + 'distance': 7, + 'angle': 8, + } + + # hackish way to allow looping over self.reference and generating tests + self._functions = { + 'mean': np.mean, + 'std': np.std, + } + + def _normalize_timeseries(self): + # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) + # array index: 0 1 2 3 4 5 6 7 8 + timeseries = [[t] + item + for t, hframe in zip(self.h.timesteps, self.h.timeseries) + for item in hframe] + return timeseries + + def test_timeseries(self): + h = self.h + assert_equal(len(h.timeseries), 10) + assert_equal(len(self.normalized_timeseries), 29) + + for observable in self.reference: + idx = self.columns[observable] + for quantity, reference in self.reference[observable].items(): + func = self._functions[quantity] + assert_almost_equal( + func([item[idx] for item in self.normalized_timeseries]), reference, + decimal=5, + err_msg="{quantity}({observable}) does not match reference".format(**vars())) + + def test_table_atoms(self): + h = self.h + table = h.table + + assert_equal(len(h.table), len(self.normalized_timeseries)) + + # test that timeseries and table agree on index data and + # hydrogen bond information at atom level + for name, idx in self.columns.items(): + assert_array_almost_equal(table.field(name), [data[idx] for data in self.normalized_timeseries], + err_msg="table[{name}] and timeseries[{idx} do not agree".format(**vars())) + + # test at residue level (issue #801 + # https://github.com/MDAnalysis/mdanalysis/issues/801) + for name, ref in self.reference_table.items(): + assert_array_equal(h.table.field(name), ref, + err_msg="resname for {0} do not match (Issue #801)") +