diff --git a/.travis.yml b/.travis.yml index d94dc955f0a..feb2d9f8775 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,6 +38,7 @@ env: - INSTALL_HOLE="true" - CYTHON_TRACE_NOGIL=1 - MPLBACKEND=agg + - MAMBA=true matrix: # Run a coverage test for most versions @@ -95,6 +96,29 @@ matrix: INSTALL_HOLE="false" CODECOV="true" SETUP_CMD="${PYTEST_FLAGS} --cov=MDAnalysis" + - os: linux + language: python + arch: arm64-graviton2 + python: + - "3.7" + dist: focal + virt: vm + group: edge + before_install: + - python -m pip install cython numpy scipy + - python -m pip install --no-build-isolation hypothesis matplotlib pytest pytest-cov pytest-xdist tqdm + install: + - cd package + - python setup.py install + - cd ../testsuite + - python setup.py install + - cd .. + script: + - cd testsuite + - python -m pytest ./MDAnalysisTests --disable-pytest-warnings -n 8 -rsx --cov=MDAnalysis + after_success: + - echo "Override this stage for ARM64" + allow_failures: - env: NUMPY_VERSION=dev EVENT_TYPE="cron" diff --git a/LICENSE b/LICENSE index 8437675a953..07f85a0db95 100644 --- a/LICENSE +++ b/LICENSE @@ -582,3 +582,45 @@ are distributed under the same license as the 'Atom' logo. ========================================================================== +pyh5md is released under the following 'BSD 3-clause' licence: + +----------------------------------------------------------------------------- +pyh5md + + Author of Original Implementation: + Pierre de Buyl + Institute for Theoretical Physics + KU Leuven University + Celestijnenlaan 200d - box 2415 + 3001 Leuven + + pierre.debuyl@kuleuven.be + + Copyright (c) 2012-2017, Pierre de Buyl and contributors + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + * Neither the name of the nor the names of its contributors may be used to + endorse or promote products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +========================================================================== \ No newline at end of file diff --git a/README.rst b/README.rst index e35df75bfa9..f952797e73d 100644 --- a/README.rst +++ b/README.rst @@ -58,7 +58,7 @@ of MDAnalysis. **Developers** may also want to refer to the `MDAnalysis API docs`_. -A growing number of **tutorials_** are available that explain how to +A growing number of `tutorials`_ are available that explain how to conduct RMSD calculations, structural alignment, distance and contact analysis, and many more. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0560cc04718..db6c20039ac 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -54,7 +54,7 @@ jobs: pytest-xdist scikit-learn scipy - h5py + h5py==2.10.0 tqdm displayName: 'Install dependencies' # TODO: recent rdkit is not on PyPI diff --git a/maintainer/deploy_docs_via_travis.sh b/maintainer/deploy_docs_via_travis.sh index af503e56727..52c613e6b89 100644 --- a/maintainer/deploy_docs_via_travis.sh +++ b/maintainer/deploy_docs_via_travis.sh @@ -53,8 +53,12 @@ git remote add upstream "https://${GH_TOKEN}@${GH_REPOSITORY}" git fetch --depth 50 upstream ${GH_DOC_BRANCH} gh-pages git reset upstream/gh-pages -# for dev, latest, home redirects -mkdir dev latest +# === REDIRECTS AND COPIES ==== +# home (index.html) redirects to stable/ +# latest (latest/index.html) redirects to most recent release +# dev/ is a copy of the dev docs with the highest number (so 2.0.0-dev instead of 1.0.1-dev) +# stable/ is a copy of the release docs with the highest number +mkdir latest export URL="https://docs.mdanalysis.org" python ${MAINTAIN_DIR}/update_json_stubs_sitemap.py touch . @@ -62,8 +66,13 @@ touch .nojekyll git add -A ${VERSION}/ git add .nojekyll versions.json -git add index.html dev latest -git add *.xml +git add index.html latest + +for dirname in dev stable documentation_pages ; do + if [ -d $dirname ]; then git add $dirname; fi +done + +git add *.xml *.html # check for anything to commit # https://stackoverflow.com/questions/3878624/how-do-i-programmatically-determine-if-there-are-uncommited-changes diff --git a/maintainer/update_json_stubs_sitemap.py b/maintainer/update_json_stubs_sitemap.py index a699a0729a5..a4cd9e537bd 100644 --- a/maintainer/update_json_stubs_sitemap.py +++ b/maintainer/update_json_stubs_sitemap.py @@ -13,6 +13,10 @@ import os import shutil import xml.etree.ElementTree as ET +import errno +import glob +import textwrap +import shutil try: from urllib.request import Request, urlopen @@ -22,6 +26,16 @@ URL = os.environ['URL'] VERSION = os.environ['VERSION'] +if "http" not in URL: + raise ValueError("URL should have the transfer protocol (HTTP/S). " + f"Given: $URL={URL}") + +try: + int(VERSION[0]) +except ValueError: + raise ValueError("$VERSION should start with a number. " + f"Given: $VERSION={VERSION}") from None + def get_web_file(filename, callback, default): url = os.path.join(URL, filename) @@ -40,14 +54,30 @@ def get_web_file(filename, callback, default): return callback(data) +def write_redirect(file, version='', outfile=None): + if outfile is None: + outfile = file + url = os.path.join(URL, version, file) + REDIRECT = textwrap.dedent(f""" + + + Redirecting to {url} + + + """) + with open(outfile, 'w') as f: + f.write(REDIRECT) + print(f"Wrote redirect from {url} to {outfile}") + + # ========= WRITE JSON ========= # Update $root/versions.json with links to the right version versions = get_web_file('versions.json', json.loads, []) existing = [item['version'] for item in versions] already_exists = VERSION in existing +latest = 'dev' not in VERSION if not already_exists: - latest = 'dev' not in VERSION if latest: for ver in versions: ver['latest'] = False @@ -59,52 +89,112 @@ def get_web_file(filename, callback, default): 'latest': latest }) -with open("versions.json", 'w') as f: - json.dump(versions, f, indent=2) - -# ========= WRITE HTML STUBS ========= -# Add HTML files to redirect: -# index.html -> latest release -# latest/index.html -> latest release -# dev/index.html -> dev docs - -REDIRECT = """ - - -Redirecting to {url} - - -""" - for ver in versions[::-1]: if ver['latest']: - latest_url = ver['url'] + latest_version = ver['version'] + break else: try: - latest_url = versions[-1]['url'] + latest_version = versions[-1]['version'] except IndexError: - latest_url = None + latest_version = None for ver in versions[::-1]: - if 'dev' in ver['version']: - dev_url = ver['url'] + if '-dev' in ver['version']: + dev_version = ver['version'] break else: try: - dev_url = versions[-1]['url'] + dev_version = versions[-1]['version'] except IndexError: - dev_url = None + dev_version = None -if latest_url: - with open('index.html', 'w') as f: - f.write(REDIRECT.format(url=latest_url)) +versions.sort(key=lambda x: x["version"]) - with open('latest/index.html', 'w') as f: - f.write(REDIRECT.format(url=latest_url)) +# ========= WRITE HTML STUBS AND COPY DOCS ========= +# Add HTML files to redirect: +# index.html -> stable/ docs +# latest/index.html -> latest release (not dev docs) +# stable/ : a copy of the release docs with the highest number (2.0.0 instead of 1.0.0) +# dev/ : a copy of the develop docs with the highest number (2.0.0-dev instead of 1.0.1-dev) +# sitemap.xml files are updated by replacing URL strings + + +def redirect_sitemap(old_version, new_version): + """Replace paths in copied sitemap.xml with new directory path + + Sitemaps can only contain URLs 'within' that directory structure. + For more, see https://www.sitemaps.org/faq.html#faq_sitemap_location + """ + file = f"{new_version}/sitemap.xml" + old = f"{URL}/{old_version}/" + new = f"{URL}/{new_version}/" + try: + with open(file, "r") as f: + contents = f.read() + except OSError: + raise ValueError(f"{file} not found") + redirected = contents.replace(old, new) + with open(file, "w") as f: + f.write(redirected) + print(f"Redirected URLs in {file} from {old} to {new}") + + +def add_or_update_version(version): + """Add or update the version path to versions.json""" + for ver in versions: + if ver["version"] == version: + ver["url"] = os.path.join(URL, version) + break + else: + versions.append({ + "version": version, + "display": version, + "url": os.path.join(URL, version), + "latest": False + }) + + +def copy_version(old_version, new_version): + """Copy docs from one directory to another with all bells and whistles""" + shutil.copytree(old_version, new_version) + print(f"Copied {old_version} to {new_version}") + redirect_sitemap(old_version, new_version) + add_or_update_version(new_version) + + +# Copy stable/ docs and write redirects from root level docs +if latest: + copy_version(VERSION, "stable") + html_files = glob.glob(f'stable/**/*.html', recursive=True) + for file in html_files: + # below should be true because we only globbed stable/* paths + assert file.startswith("stable/") + outfile = file[7:] # strip "stable/" + dirname = os.path.dirname(outfile) + if dirname and not os.path.exists(dirname): + try: + os.makedirs(dirname) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + + write_redirect(file, '', outfile) + +# Separate just in case we update versions.json or muck around manually +# with docs +if latest_version: + write_redirect('index.html', "stable") + write_redirect('index.html', latest_version, 'latest/index.html') + +# Copy dev/ docs +if dev_version and dev_version == VERSION: + copy_version(VERSION, "dev") + +# update versions.json online +with open("versions.json", 'w') as f: + json.dump(versions, f, indent=2) -if dev_url: - with open('dev/index.html', 'w') as f: - f.write(REDIRECT.format(url=dev_url)) # ========= WRITE SUPER SITEMAP.XML ========= # make one big sitemap.xml diff --git a/package/CHANGELOG b/package/CHANGELOG index 1df27904a8e..67b6f83f71e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,11 +15,17 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy, lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, - calcraven,xiki-tempula, mieczyslaw + calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo * 2.0.0 Fixes + * AtomGroup.center now works correctly for compounds + unwrapping + (Issue #2984) + * Avoid using deprecated array indexing in topology attributes + (Issue #2990, PR #2991) + * ParmedParser no longer guesses elements if they are not recognised, instead + empty strings are assigned (Issue #2933) * Instead of using ATOM for both ATOM and HETATM, HETATM record type keyword is properly written out by PDB writer (Issue #1753, PR #2880). * Change referenced PDB standard version to 3.3 (Issue #2906). @@ -45,6 +51,7 @@ Fixes * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError was thrown when finding D-H pairs via the topology if `hydrogens` was an empty AtomGroup (Issue #2848) + * Fixed performance regression on select_atoms for string selections (#2751) * Fixed the DMSParser, allowing the creation of multiple segids sharing residues with identical resids (Issue #1387, PR #2872) * H5MD files are now picklable with H5PYPicklable (Issue #2890, PR #2894) @@ -52,9 +59,17 @@ Fixes * libmdaxdr and libdcd classes in their last frame can now be pickled (Issue #2878, PR #2911) * AtomGroup now are pickled/unpickled without looking for its anchored - Universe (PR #2893) + Universe (PR #2893) + * ensure that unistd.h is included on macOS when compiling ENCORE's spe.c + (Issue #2934) + * Fix tests for analysis.bat that could fail when run in parallel and that + would create a test artifact (Issue #2979, PR #2981) Enhancements + * The PDB writer gives more control over how to write the atom ids + (Issue #1072, PR #2886) + * Preliminary support for the Linux ARM64 platform with minimal + dependencies (PR #2956) * Refactored analysis.helanal into analysis.helix_analysis (Issue #2452, PR #2622) * Improved MSD code to accept `AtomGroup` and reject `UpdatingAtomGroup` @@ -79,8 +94,19 @@ Enhancements * Added new kwargs `select_remove` and `select_protein` to analysis.dihedrals.Janin analysis to give user more fine grained control over selections (PR #2899) + * Improved performance of select_atoms on strings (e.g. name, type, resname) and + 'protein' selection (#2751 PR #2755) + * Added an RDKit converter that works for any input with all hydrogens + explicit in the topology (Issue #2468, PR #2775) Changes + * Continuous integration uses mamba rather than conda to install the + dependencies (PR #2983) + * removes deprecated `as_Universe` function from MDAnalysis.core.universe, + as a result :class:`MDAnalysis.analysis.leaflet.LeafletFinder` now only + accepts Universes for its `universe` argument (Issue #2920) + * deprecated Python escape sequence usage has been fixed in our test suite, + and the test suite will now raise an error for this usage of `\` (PR #2885) * deprecated NumPy type aliases have been replaced with their actual types (see upstream NumPy PR 14882), and our pytest configuration will raise an error for any violation when testing with development NumPy builds @@ -103,6 +129,11 @@ Changes * Sets the minimal RDKit version for CI to 2020.03.1 (Issue #2827, PR #2831) * Removes deprecated waterdynamics.HydrogenBondLifetimes (PR #2842) * Make NeighborSearch return empty atomgroup, residue, segments instead of list (Issue #2892, PR #2907) + * Updated Universe creation function signatures to named arguments (Issue #2921) + * The transformation was changed from a function/closure to a class with + `__call__` (Issue #2860, PR #2859) + * deprecated ``analysis.helanal`` module has been removed in favour of + ``analysis.helix_analysis`` (PR #2929) Deprecations diff --git a/package/MDAnalysis/__init__.py b/package/MDAnalysis/__init__.py index 49307f2df84..3e67264d30c 100644 --- a/package/MDAnalysis/__init__.py +++ b/package/MDAnalysis/__init__.py @@ -150,7 +150,7 @@ """ -__all__ = ['Universe', 'as_Universe', 'Writer', 'fetch_mmtf', +__all__ = ['Universe', 'Writer', 'fetch_mmtf', 'AtomGroup', 'ResidueGroup', 'SegmentGroup'] import logging @@ -202,7 +202,7 @@ from . import units # Bring some often used objects into the current namespace -from .core.universe import Universe, as_Universe, Merge +from .core.universe import Universe, Merge from .core.groups import AtomGroup, ResidueGroup, SegmentGroup from .coordinates.core import writer as Writer diff --git a/package/MDAnalysis/analysis/__init__.py b/package/MDAnalysis/analysis/__init__.py index 5b1bb52b7e8..9a26a321c8e 100644 --- a/package/MDAnalysis/analysis/__init__.py +++ b/package/MDAnalysis/analysis/__init__.py @@ -56,7 +56,7 @@ Analyze hydrogen bonds, including both the per frame results as well as the dynamic properties and lifetimes. -:mod:`~MDAnalysis.analysis.helanal` +:mod:`~MDAnalysis.analysis.helix_analysis` Analysis of helices with the HELANAL_ algorithm. :mod:`~MDAnalysis.analysis.hole2` @@ -126,7 +126,7 @@ 'gnm', 'hbonds', 'hydrogenbonds', - 'helanal', + 'helix_analysis', 'hole2', 'leaflet', 'msd', diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 0eab1ec8c76..490b3bb2e23 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -167,6 +167,7 @@ .. autofunction:: alignto .. autoclass:: AlignTraj +.. autoclass:: AverageStructure .. autofunction:: rotation_matrix @@ -796,13 +797,8 @@ def __init__(self, mobile, reference=None, select='all', filename=None, already a :class:`MemoryReader` then it is *always* treated as if ``in_memory`` had been set to ``True``. - .. versionadded:: 1.0.0 - - .. versionchanged:: 1.0.0 - Support for the ``start``, ``stop``, and ``step`` keywords has been - removed. These should instead be passed - to :meth:`AverageStructure.run`. + .. versionadded:: 1.0.0 """ if in_memory or isinstance(mobile.trajectory, MemoryReader): mobile.transfer_to_memory() diff --git a/package/MDAnalysis/analysis/data/filenames.py b/package/MDAnalysis/analysis/data/filenames.py index ffbee5a1a0b..8cde8217ee3 100644 --- a/package/MDAnalysis/analysis/data/filenames.py +++ b/package/MDAnalysis/analysis/data/filenames.py @@ -20,24 +20,26 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""Analysis data files +r"""Analysis data files =================== :mod:`MDAnalysis.analysis.data` contains data files that are used as part of analysis. These can be experimental or theoretical data. Files are stored inside the package and made accessible via variables in :mod:`MDAnalysis.analysis.data.filenames`. These variables are documented -below, including references to the literature and where they are used -inside :mod:`MDAnalysis.analysis`. +below, including references to the literature and where they are used inside +:mod:`MDAnalysis.analysis`. Data files ---------- .. data:: Rama_ref - Reference Ramachandran histogram for :class:`MDAnalysis.analysis.dihedrals.Ramachandran`. - The data were calculated on a data set of 500 PDB structures taken from [Lovell2003]_. - This is a numpy array in the :math:`\phi` and :math:`psi` backbone dihedral angles. + Reference Ramachandran histogram for + :class:`MDAnalysis.analysis.dihedrals.Ramachandran`. The data were + calculated on a data set of 500 PDB structures taken from [Lovell2003]_. + This is a numpy array in the :math:`\phi` and :math:`\psi` backbone dihedral + angles. Load and plot it with :: @@ -55,8 +57,9 @@ .. data:: Janin_ref Reference Janin histogram for :class:`MDAnalysis.analysis.dihedrals.Janin`. - The data were calculated on a data set of 500 PDB structures taken from [Lovell2003]_. - This is a numpy array in the :math:`\chi_1` and :math:`chi_2` sidechain dihedral angles. + The data were calculated on a data set of 500 PDB structures taken from + [Lovell2003]_. This is a numpy array in the :math:`\chi_1` and + :math:`\chi_2` sidechain dihedral angles. Load and plot it with :: @@ -73,7 +76,7 @@ """ __all__ = [ - "Rama_ref", "Janin_ref" # reference plots for Ramachandran and Janin classes + "Rama_ref", "Janin_ref", # reference plots for Ramachandran and Janin classes ] from pkg_resources import resource_filename diff --git a/package/MDAnalysis/analysis/dihedrals.py b/package/MDAnalysis/analysis/dihedrals.py index cbbf0c19bfa..8243e8c1a9b 100644 --- a/package/MDAnalysis/analysis/dihedrals.py +++ b/package/MDAnalysis/analysis/dihedrals.py @@ -349,6 +349,7 @@ class Ramachandran(AnalysisBase): .. versionchanged:: 1.0.0 added c_name, n_name, ca_name, and check_protein keyword arguments + """ def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', @@ -422,8 +423,8 @@ def _conclude(self): def plot(self, ax=None, ref=False, **kwargs): """Plots data into standard Ramachandran plot. - Each time step in :attr:`Ramachandran.angles` is plotted onto - the same graph. + Each time step in :attr:`Ramachandran.angles` is plotted onto the same + graph. Parameters ---------- @@ -468,8 +469,8 @@ def plot(self, ax=None, ref=False, **kwargs): class Janin(Ramachandran): - r"""Calculate :math:`\chi_1` and :math:`\chi_2` dihedral angles of - selected residues. + r"""Calculate :math:`\chi_1` and :math:`\chi_2` dihedral angles of selected + residues. :math:`\chi_1` and :math:`\chi_2` angles will be calculated for each residue corresponding to `atomgroup` for each time step in the trajectory. A diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py b/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py index 2a9b28daf51..df8be6817f4 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusterCollection.py @@ -101,8 +101,9 @@ def __init__(self, elem_list=None, centroid=None, idn=None, metadata=None): if metadata: for name, data in metadata.items(): if len(data) != self.size: - raise TypeError("Size of metadata having label \"{0}\"\ -is not equal to the number of cluster elmements".format(name)) + raise TypeError('Size of metadata having label "{0}"' + 'is not equal to the number of cluster ' + 'elements'.format(name)) self.add_metadata(name, data) def __iter__(self): @@ -119,8 +120,8 @@ def __len__(self): def add_metadata(self, name, data): if len(data) != self.size: - raise TypeError("Size of metadata is not equal to the number of\ - cluster elmements") + raise TypeError("Size of metadata is not equal to the number of" + "cluster elmements") self.metadata[name] = np.array(data) def __repr__(self): @@ -192,8 +193,9 @@ def __init__(self, elements=None, metadata=None): centroids = np.unique(elements_array) for i in centroids: if elements[i] != i: - raise ValueError("element {0}, which is a centroid, doesn't \ -belong to its own cluster".format(elements[i])) + raise ValueError("element {0}, which is a centroid, doesn't " + "belong to its own cluster".format( + elements[i])) for c in centroids: this_metadata = {} this_array = np.where(elements_array == c) diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index e5389c67ebf..483964d5594 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -171,7 +171,7 @@ def conformational_distance_matrix(ensemble, # Initialize workers. Simple worker doesn't perform fitting, # fitter worker does. indices = trm_indices((0, 0), (framesn - 1, framesn - 1)) - Parallel(n_jobs=n_jobs, verbose=verbose, require='sharedmem', + Parallel(n_jobs=n_jobs, verbose=verbose, require='sharedmem', max_nbytes=max_nbytes)(delayed(conf_dist_function)( np.int64(element), rmsd_coordinates, @@ -247,8 +247,8 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, weights, fit_coords=None, rotated_i.astype(np.float64), translated_j.astype(np.float64), coords[j].shape[0], weights, sumweights) else: - raise TypeError("Both fit_coords and fit_weights must be specified \ - if one of them is given") + raise TypeError("Both fit_coords and fit_weights must be specified " + "if one of them is given") def get_distance_matrix(ensemble, diff --git a/package/MDAnalysis/analysis/encore/covariance.py b/package/MDAnalysis/analysis/encore/covariance.py index 07e65c292bd..352d934f9d5 100644 --- a/package/MDAnalysis/analysis/encore/covariance.py +++ b/package/MDAnalysis/analysis/encore/covariance.py @@ -119,7 +119,7 @@ def shrinkage_covariance_estimator( coordinates, xmkt = np.average(x, axis=1) # Call maximum likelihood estimator (note the additional column) - sample = ml_covariance_estimator(np.hstack([x, xmkt[:, np.newaxis]]), 0) \ + sample = ml_covariance_estimator(np.hstack([x, xmkt[:, np.newaxis]]), 0)\ * (t-1)/float(t) # Split covariance matrix into components diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c b/package/MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c index 7d129bcfd79..f3ae089a7c2 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c @@ -27,12 +27,10 @@ #include #include -#ifdef __unix__ - #include -#endif - #ifdef _WIN32 #include +#else /* unix-like __unix__ || __APPLE__ */ + #include #endif #define EPSILON 1e-8 diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index 6f8bb4bdd3e..c338ba28ee3 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -469,30 +469,33 @@ def gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, def dimred_ensemble_similarity(kde1, resamples1, kde2, resamples2, ln_P1_exp_P1=None, ln_P2_exp_P2=None, ln_P1P2_exp_P1=None, ln_P1P2_exp_P2=None): - """ - Calculate the Jensen-Shannon divergence according the the - Dimensionality reduction method. In this case, we have continuous - probability densities, this we need to integrate over the measurable - space. The aim is to first calculate the Kullback-Liebler divergence, which - is defined as: + r"""Calculate the Jensen-Shannon divergence according the Dimensionality + reduction method. + + In this case, we have continuous probability densities, this we need to + integrate over the measurable space. The aim is to first calculate the + Kullback-Liebler divergence, which is defined as: .. math:: - D_{KL}(P(x) || Q(x)) = \\int_{-\\infty}^{\\infty}P(x_i) ln(P(x_i)/Q(x_i)) = \\langle{}ln(P(x))\\rangle{}_P - \\langle{}ln(Q(x))\\rangle{}_P - where the :math:`\\langle{}.\\rangle{}_P` denotes an expectation calculated + D_{KL}(P(x) || Q(x)) = + \int_{-\infty}^{\infty}P(x_i) ln(P(x_i)/Q(x_i)) = + \langle{}ln(P(x))\rangle{}_P - \langle{}ln(Q(x))\rangle{}_P + + where the :math:`\langle{}.\rangle{}_P` denotes an expectation calculated under the distribution P. We can, thus, just estimate the expectation - values of the components to get an estimate of dKL. - Since the Jensen-Shannon distance is actually more complex, we need to - estimate four expectation values: + values of the components to get an estimate of dKL. Since the + Jensen-Shannon distance is actually more complex, we need to estimate four + expectation values: .. math:: - \\langle{}log(P(x))\\rangle{}_P + \langle{}log(P(x))\rangle{}_P - \\langle{}log(Q(x))\\rangle{}_Q + \langle{}log(Q(x))\rangle{}_Q - \\langle{}log(0.5*(P(x)+Q(x)))\\rangle{}_P + \langle{}log(0.5*(P(x)+Q(x)))\rangle{}_P - \\langle{}log(0.5*(P(x)+Q(x)))\\rangle{}_Q + \langle{}log(0.5*(P(x)+Q(x)))\rangle{}_Q Parameters ---------- @@ -512,22 +515,22 @@ def dimred_ensemble_similarity(kde1, resamples1, kde2, resamples2, calculate the expected values according to 'Q' as detailed before. ln_P1_exp_P1 : float or None - Use this value for :math:`\\langle{}log(P(x))\\rangle{}_P`; if None, + Use this value for :math:`\langle{}log(P(x))\rangle{}_P`; if ``None``, calculate it instead ln_P2_exp_P2 : float or None - Use this value for :math:`\\langle{}log(Q(x))\\rangle{}_Q`; if - None, calculate it instead + Use this value for :math:`\langle{}log(Q(x))\rangle{}_Q`; if + ``None``, calculate it instead ln_P1P2_exp_P1 : float or None Use this value for - :math:`\\langle{}log(0.5*(P(x)+Q(x)))\\rangle{}_P`; - if None, calculate it instead + :math:`\langle{}log(0.5*(P(x)+Q(x)))\rangle{}_P`; + if ``None``, calculate it instead ln_P1P2_exp_P2 : float or None Use this value for - :math:`\\langle{}log(0.5*(P(x)+Q(x)))\\rangle{}_Q`; - if None, calculate it instead + :math:`\langle{}log(0.5*(P(x)+Q(x)))\rangle{}_Q`; + if ``None``, calculate it instead Returns ------- @@ -720,11 +723,10 @@ def hes(ensembles, estimate_error=False, bootstrapping_samples=100, calc_diagonal=False): - """ + r"""Calculates the Harmonic Ensemble Similarity (HES) between ensembles. - Calculates the Harmonic Ensemble Similarity (HES) between ensembles using - the symmetrized version of Kullback-Leibler divergence as described - in [Tiberti2015]_. + The HES is calculated with the symmetrized version of Kullback-Leibler + divergence as described in [Tiberti2015]_. Parameters ---------- @@ -766,13 +768,13 @@ def hes(ensembles, symmetrized version of Kullback-Leibler divergence defined as: .. math:: - D_{KL}(P(x) || Q(x)) = \\int_{-\\infty}^{\\infty}P(x_i) - ln(P(x_i)/Q(x_i)) = \\langle{}ln(P(x))\\rangle{}_P - - \\langle{}ln(Q(x))\\rangle{}_P + D_{KL}(P(x) || Q(x)) = + \int_{-\infty}^{\infty}P(x_i) ln(P(x_i)/Q(x_i)) = + \langle{}ln(P(x))\rangle{}_P - \langle{}ln(Q(x))\rangle{}_P - where the :math:`\\langle{}.\\rangle{}_P` denotes an expectation - calculated under the distribution P. + where the :math:`\langle{}.\rangle{}_P` denotes an expectation + calculated under the distribution :math:`P`. For each ensemble, the mean conformation is estimated as the average over the ensemble, and the covariance matrix is calculated by default using a @@ -802,16 +804,17 @@ def hes(ensembles, [ 38279683.95892926 0. ]] - You can use the align=True option to align the ensembles first. This will + You can use the ``align=True`` option to align the ensembles first. This will align everything to the current timestep in the first ensemble. Note that - this changes the ens1 and ens2 objects: + this changes the ``ens1`` and ``ens2`` objects: >>> print(encore.hes([ens1, ens2], align=True)[0]) [[ 0. 6880.34140106] [ 6880.34140106 0. ]] Alternatively, for greater flexibility in how the alignment should be done - you can call use an AlignTraj object manually: + you can call use an :class:`~MDAnalysis.analysis.align.AlignTraj` object + manually: >>> from MDAnalysis.analysis import align >>> align.AlignTraj(ens1, ens1, select="name CA", in_memory=True).run() @@ -820,9 +823,10 @@ def hes(ensembles, [[ 0. 7032.19607004] [ 7032.19607004 0. ]] + .. versionchanged:: 1.0.0 - hes doesn't accept the *details* argument anymore, it always returns the - details of the calculation instead, in the form of a dictionary + ``hes`` doesn't accept the `details` argument anymore, it always returns + the details of the calculation instead, in the form of a dictionary """ diff --git a/package/MDAnalysis/analysis/encore/utils.py b/package/MDAnalysis/analysis/encore/utils.py index a41ebe30174..399eedd320c 100644 --- a/package/MDAnalysis/analysis/encore/utils.py +++ b/package/MDAnalysis/analysis/encore/utils.py @@ -190,7 +190,7 @@ def __str__(self): class ParallelCalculation(object): - """ + r""" Generic parallel calculation class. Can use arbitrary functions, arguments to functions and kwargs to functions. @@ -279,7 +279,7 @@ def worker(self, q, results): results.put((i, self.functions[i](*self.args[i], **self.kwargs[i]))) def run(self): - """ + r""" Run parallel calculation. Returns diff --git a/package/MDAnalysis/analysis/gnm.py b/package/MDAnalysis/analysis/gnm.py index aae83c9c0f2..65c5356f73c 100644 --- a/package/MDAnalysis/analysis/gnm.py +++ b/package/MDAnalysis/analysis/gnm.py @@ -352,7 +352,7 @@ def run(self, start=None, stop=None, step=None): class closeContactGNMAnalysis(GNMAnalysis): - """GNMAnalysis only using close contacts. + r"""GNMAnalysis only using close contacts. This is a version of the GNM where the Kirchoff matrix is constructed from the close contacts between individual atoms diff --git a/package/MDAnalysis/analysis/helanal.py b/package/MDAnalysis/analysis/helanal.py deleted file mode 100644 index 6a8f1b0861e..00000000000 --- a/package/MDAnalysis/analysis/helanal.py +++ /dev/null @@ -1,831 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -# Python implementation of FORTRAN HELANAL -# [Bansal et al, J Biomol Struct Dyn. 17 (2000), 811] -# Copyright (c) 2009 Benjamin Hall -# Published under the GNU General Public License v2 or higher -# -# Copyright (c) 2011 Oliver Beckstein -# Integrated into MDAnalysis and NumPy-fied - - -""" -HELANAL (Deprecated) --- analysis of protein helices -==================================================== - -:Author: Benjamin Hall , Oliver Beckstein, Xavier Deupi -:Year: 2009, 2011, 2013 -:License: GNU General Public License v2 (or higher) - -.. note:: - - This module was deprecated in 1.0 and will be removed in 2.0. - Please use MDAnalysis.analysis.helix_analysis instead. - - -The :mod:`MDAnalysis.analysis.helanal` module is a Python implementation of the -HELANAL_ algorithm [Bansal2000]_ in `helanal.f`_, which is also available -through the `HELANAL webserver`_. - -Please cite the paper [Bansal2000]_ (and possibly [Kumar1996]_ and -[Kumar1998]_) in published work when using -:mod:`~MDAnalysis.analysis.helanal.helanal_trajectory` or -:mod:`~MDAnalysis.analysis.helanal.helanal_main`. - -HELANAL_ quantifies the geometry of helices in proteins on the basis of their Cα -atoms alone. It can extract the helices from the structure files and then -characterises the overall geometry of each helix as being linear, curved or -kinked, in terms of its local structural features, viz. local helical twist and -rise, virtual torsion angle, local helix origins and bending angles between -successive local helix axes. Even helices with large radius of curvature are -unambiguously identified as being linear or curved. The program can also be -used to differentiate a kinked helix and other motifs, such as helix-loop-helix -or a helix-turn-helix (with a single residue linker) with the help of local -bending angles. In addition to these, the program can also be used to -characterise the helix start and end as well as other types of secondary -structures. - -.. _HELANAL: http://www.ccrnp.ncifcrf.gov/users/kumarsan/HELANAL/helanal.html -.. _Helanal webserver: http://nucleix.mbu.iisc.ernet.in/helanal/helanal.shtml -.. `helanal.f`: http://www.webcitation.org/5y1RpVJtF -.. _`helanal.f`: http://www.ccrnp.ncifcrf.gov/users/kumarsan/HELANAL/helanal.f - -Background ----------- - -From the HELANAL_ home page: - -HELANAL_ can be used to characterize the geometry of helices with a minimum 9 -residues. The geometry of an alpha helix is characterized by computing local -helix axes and local helix origins for four contiguous C-Alpha atoms, using the -procedure of Sugeta and Miyazawa [Sugeta1967]_ and sliding this window over -the length of the helix in steps of one C-alpha atom. - -The angles between successive local helix axes can identify *local bends* or -*kinks* as well as occurrence of *smooth curvature* in the helix. A matrix, whose -elements *M(I, J)* are the *bending angles* between local helix axes *I* and *J*, -is obtained to get an idea about the overall geometry of the helix. - -*Unit twist* and *unit height* of the alpha helix are also computed to analyze the -uniformity of the helix. The *local helix origins* trace out the path described -by the helix in three dimensional space. The local helix origins are reoriented -in *X-Y* plane and the reoriented points are used to fit a circle as well as a -line, by least squares method. Based on the relative goodness of line and -circle fit to local helix origins, the helix is *classified as being linear or -curved*. A helix is classified as being *kinked*, if at least one local bending -angle in the middle of the helix is greater than 20 degrees. - - -References ----------- - -.. [Sugeta1967] Sugeta, H. and Miyazawa, T. 1967. General method for - calculating helical parameters of polymer chains from bond lengths, bond - angles and internal rotation angles. *Biopolymers* 5 673 - 679 - -.. [Kumar1996] Kumar, S. and Bansal, M. 1996. Structural and sequence - characteristics of long alpha-helices in globular proteins. *Biophysical - Journal* 71(3):1574-1586. - -.. [Kumar1998] Kumar, S. and Bansal, M. 1998. Geometrical and sequence - characteristics of alpha helices in globular proteins. *Biophysical Journal* - 75(4):1935-1944. - -.. [Bansal2000] Bansal M, Kumar S, Velavan R. 2000. - HELANAL - A program to characterise helix geometry in proteins. - *J Biomol Struct Dyn.* 17(5):811-819. - -Functions ---------- - -.. autofunction:: helanal_trajectory -.. autofunction:: helanal_main - -""" -import os - -import numpy as np - -import MDAnalysis -from MDAnalysis.lib.log import ProgressBar -from MDAnalysis.lib import mdamath - -import warnings -import logging -logger = logging.getLogger("MDAnalysis.analysis.helanal") - -warnings.warn("This module is deprecated as of MDAnalysis version 1.0. " - "It will be removed in MDAnalysis version 2.0." - "Please use MDAnalysis.analysis.helix_analysis instead.", - category=DeprecationWarning) - -def center(coordinates): - """Return the geometric center (centroid) of the coordinates. - - Coordinates must be "list of cartesians", i.e. a Nx3 array. - """ - return np.mean(coordinates, axis=0) - -def vecnorm(a): - """Return a/|a|""" - return a / mdamath.norm(a) - -def wrapangle(angle): - """Wrap angle (in radians) to be within -pi < angle =< pi""" - if angle > np.pi: - angle -= 2 * np.pi - elif angle <= -np.pi: - angle += 2 * np.pi - return angle - -def sample_sd(a, dummy): - return np.std(a, ddof=1) - -def mean_abs_dev(a, mean_a=None): - if mean_a is None: - mean_a = np.mean(a) - return np.mean(np.fabs(a - mean_a)) - -def helanal_trajectory(universe, select="name CA", - begin=None, finish=None, - matrix_filename="bending_matrix.dat", - origin_pdbfile="origin.pdb", - summary_filename="summary.txt", - screw_filename="screw.xvg", - tilt_filename="local_tilt.xvg", - fitted_tilt_filename="fit_tilt.xvg", - bend_filename="local_bend.xvg", - twist_filename="unit_twist.xvg", - prefix="helanal_", ref_axis=None, - verbose=False): - """Perform HELANAL helix analysis on all frames in `universe`. - - Parameters - ---------- - universe : Universe - select : str (optional) - selection string that selects Calpha atoms [``"name CA"``] - begin : float (optional) - start analysing for time (ps) >= *begin*; ``None`` starts from the - beginning [``None``] - finish : float (optional) - stop analysis for time (ps) =< *finish*; ``None`` goes to the - end of the trajectory [``None``] - matrix_filename : str (optional) - Output file- bending matrix [``"bending_matrix.dat"``] - origin_pdbfile : str (optional) - Output file- origin pdb file [``"origin.pdb"``] - summary_filename : str (optional) - Output file- all of the basic data [``"summary.txt"``] - screw_filename : str (optional) - Output file- local tilts of individual residues from 2 to n-1 - [``"screw.xvg"``] - tilt_filename : str (optional) - Output file- tilt of line of best fit applied to origin axes - [``"local_tilt.xvg"``] - bend_filename : str (optional) - Output file- local bend angles between successive local helix axes - [``"local_bend.xvg"``] - twist_filename : str (optional) - Output file- local unit twist between successive helix turns - [``"unit_twist.xvg"``] - prefix : str (optional) - Prefix to add to all output file names; set to ``None`` to disable - [``"helanal__"``] - ref_axis : array_like (optional) - Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]`` - is chosen [``None``] - verbose : bool (optional) - Toggle diagnostic outputs. [``True``] - - Raises - ------ - ValueError - If the specified start (begin) time occurs after the end of the - trajectory object. - If the specified finish time precedes the specified start time or - current time stamp of trajectory object. - - Notes - ----- - Only a single helix is analyzed. Use the selection to specify the helix, - e.g. with "name CA and resid 1:20" or use start=1, stop=20. - - - .. versionchanged:: 0.13.0 - New `quiet` keyword to silence frame progress output and most of the - output that used to be printed to stdout is now logged to the logger - *MDAnalysis.analysis.helanal* (at logelevel *INFO*). - - .. versionchanged:: 0.16.0 - Removed the `start` and `end` keywords for selecting residues because this can - be accomplished more transparently with `select`. The first and last resid - are directly obtained from the selection. - - .. deprecated:: 0.16.0 - The `quiet` keyword argument is deprecated in favor of the new - `verbose` one. - - .. versionchanged:: 0.20.0 - ProgressMeter now iterates over the number of frames analysed. - - .. versionchanged:: 1.0.0 - Changed `selection` keyword to `select` - """ - - warnings.warn("This function is deprecated as of MDAnalysis version 1.0. " - "It will be removed in MDAnalysis version 2.0. Please use " - "MDAnalysis.analysis.helix_analysis.HELANAL instead.", - category=DeprecationWarning) - - if ref_axis is None: - ref_axis = np.array([0., 0., 1.]) - else: - # enable MDA API so that one can use a tuple of atoms or AtomGroup with - # two atoms - ref_axis = np.asarray(ref_axis) - - ca = universe.select_atoms(select) - start, end = ca.resids[[0, -1]] - trajectory = universe.trajectory - - # Validate user supplied begin / end times - traj_end_time = trajectory.ts.time + trajectory.totaltime - - if begin is not None: - if traj_end_time < begin: - # Begin occurs after the end of the trajectory, throw error - msg = ("The input begin time ({0} ps) occurs after the end " - "of the trajectory ({1} ps)".format(begin, traj_end_time)) - raise ValueError(msg) - elif trajectory.ts.time > begin: - # Begin occurs before trajectory start, warn and reset - msg = ("The input begin time ({0} ps) precedes the starting " - "trajectory time --- Setting starting frame to 0".format( - begin)) - warnings.warn(msg) - logger.warning(msg) - start_frame = None - else: - start_frame = int(np.ceil((begin - trajectory.ts.time) - / trajectory.ts.dt)) - else: - start_frame = None - - if finish is not None: - if (begin is not None) and (begin > finish): - # finish occurs before begin time - msg = ("The input finish time ({0} ps) precedes the input begin " - "time ({1} ps)".format(finish, begin)) - raise ValueError(msg) - elif trajectory.ts.time > finish: - # you'd be starting with a finish time(in ps) that has already - # passed or is not available - msg = ("The input finish time ({0} ps) precedes the current " - "trajectory time ({1} ps)".format(finish, trajectory.time)) - raise ValueError(msg) - elif traj_end_time < finish: - # finish time occurs after the end of trajectory, warn - msg = ("The input finish time ({0} ps) occurs after the end of " - "the trajectory ({1} ps). Finish time will be set to the " - "end of the trajectory".format(finish, traj_end_time)) - warnings.warn(msg) - logger.warning(msg) - end_frame = None - else: - # To replicate the original behaviour of break when - # trajectory.time > finish, we add 1 here. - end_frame = int(np.floor((finish - trajectory.ts.time) - // trajectory.ts.dt) + 1) - else: - end_frame = None - - start_frame, end_frame, frame_step = trajectory.check_slice_indices( - start_frame, end_frame, 1) - n_frames = len(range(start_frame, end_frame, frame_step)) - - if start is not None and end is not None: - logger.info("Analysing from residue %d to %d", start, end) - elif start is not None and end is None: - logger.info("Analysing from residue %d to the C termini", start) - elif start is None and end is not None: - logger.info("Analysing from the N termini to %d", end) - logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues) - - if prefix is not None: - prefix = str(prefix) - matrix_filename = prefix + matrix_filename - origin_pdbfile = prefix + origin_pdbfile - summary_filename = prefix + summary_filename - screw_filename = prefix + screw_filename - tilt_filename = prefix + tilt_filename - fitted_tilt_filename = prefix + fitted_tilt_filename - bend_filename = prefix + bend_filename - twist_filename = prefix + twist_filename - backup_file(matrix_filename) - backup_file(origin_pdbfile) - backup_file(summary_filename) - backup_file(screw_filename) - backup_file(tilt_filename) - backup_file(fitted_tilt_filename) - backup_file(bend_filename) - backup_file(twist_filename) - - global_height = [] - global_twist = [] - global_rnou = [] - global_bending = [] - global_bending_matrix = [] - global_tilt = [] - global_fitted_tilts = [] - global_screw = [] - - for ts in ProgressBar(trajectory[start_frame:end_frame:frame_step], - verbose=verbose, desc="Helix analysis"): - - frame = ts.frame - ca_positions = ca.positions - twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \ - main_loop(ca_positions, ref_axis=ref_axis) - - origin_pdb(origins, origin_pdbfile) - - #calculate local bending matrix( it is looking at all i, j combinations) - if len(global_bending_matrix) == 0: - global_bending_matrix = [[[] for item in local_helix_axes] for item in local_helix_axes] - - for i in range(len(local_helix_axes)): - for j in range(i + 1, len(local_helix_axes)): - angle = np.rad2deg(np.arccos(np.dot(local_helix_axes[i], local_helix_axes[j]))) - global_bending_matrix[i][j].append(angle) - #global_bending_matrix[j][i].append(angle) - #global_bending_matrix[i][i].append(0.) - - fit_vector, fit_tilt = vector_of_best_fit(origins) - global_height += height - global_twist += twist - global_rnou += rnou - #global_screw.append(local_screw_angles) - global_fitted_tilts.append(np.rad2deg(fit_tilt)) - - #print out rotations across the helix to a file - with open(twist_filename, "a") as twist_output: - print(frame, end='', file=twist_output) - for loc_twist in twist: - print(loc_twist, end='', file=twist_output) - print("", file=twist_output) - - with open(bend_filename, "a") as bend_output: - print(frame, end='', file=bend_output) - for loc_bend in bending_angles: - print(loc_bend, end='', file=bend_output) - print("", file=bend_output) - - with open(screw_filename, "a") as rot_output: - print(frame, end='', file=rot_output) - for rotation in local_screw_angles: - print(rotation, end='', file=rot_output) - print("", file=rot_output) - - with open(tilt_filename, "a") as tilt_output: - print(frame, end='', file=tilt_output) - for tilt in local_helix_axes: - print(np.rad2deg(mdamath.angle(tilt, ref_axis)), - end='', file=tilt_output) - print("", file=tilt_output) - - with open(fitted_tilt_filename, "a") as tilt_output: - print(frame, np.rad2deg(fit_tilt), file=tilt_output) - - if len(global_bending) == 0: - global_bending = [[] for item in bending_angles] - #global_tilt = [ [] for item in local_helix_axes ] - for store, tmp in zip(global_bending, bending_angles): - store.append(tmp) - #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis)) - - twist_mean, twist_sd, twist_abdev = stats(global_twist) - height_mean, height_sd, height_abdev = stats(global_height) - rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou) - ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts) - - bending_statistics = [stats(item) for item in global_bending] - #tilt_statistics = [ stats(item) for item in global_tilt] - - bending_statistics_matrix = [[stats(col) for col in row] for row in global_bending_matrix] - with open(matrix_filename, 'w') as mat_output: - print("Mean", file=mat_output) - for row in bending_statistics_matrix: - for col in row: - formatted_angle = "{0:6.1f}".format(col[0]) - print(formatted_angle, end='', file=mat_output) - print('', file=mat_output) - - print('\nSD', file=mat_output) - for row in bending_statistics_matrix: - for col in row: - formatted_angle = "{0:6.1f}".format(col[1]) - print(formatted_angle, end='', file=mat_output) - print('', file=mat_output) - - print("\nABDEV", file=mat_output) - for row in bending_statistics_matrix: - for col in row: - formatted_angle = "{0:6.1f}".format(col[2]) - print(formatted_angle, end='', file=mat_output) - print('', file=mat_output) - - logger.info("Height: %g SD: %g ABDEV: %g (Angstroem)", height_mean, height_sd, height_abdev) - logger.info("Twist: %g SD: %g ABDEV: %g", twist_mean, twist_sd, twist_abdev) - logger.info("Residues/turn: %g SD: %g ABDEV: %g", rnou_mean, rnou_sd, rnou_abdev) - logger.info("Fitted tilt: %g SD: %g ABDEV: %g", ftilt_mean, ftilt_sd, ftilt_abdev) - logger.info("Local bending angles:") - residue_statistics = list(zip(*bending_statistics)) - measure_names = ["Mean ", "SD ", "ABDEV"] - if start is None: - output = " ".join(["{0:8d}".format(item) - for item in range(4, len(residue_statistics[0]) + 4)]) - else: - output = " ".join(["{0:8d}".format(item) - for item in range(start + 3, len(residue_statistics[0]) + start + 3)]) - logger.info("ResID %s", output) - for measure, name in zip(residue_statistics, measure_names): - output = str(name) + " " - output += " ".join(["{0:8.1f}".format(residue) for residue in measure]) - logger.info(output) - - with open(summary_filename, 'w') as summary_output: - print("Height:", height_mean, "SD", height_sd, "ABDEV", height_abdev, '(nm)', file=summary_output) - print("Twist:", twist_mean, "SD", twist_sd, "ABDEV", twist_abdev, - file=summary_output) - print("Residues/turn:", rnou_mean, "SD", rnou_sd, "ABDEV", rnou_abdev, - file=summary_output) - print("Local bending angles:", file=summary_output) - residue_statistics = list(zip(*bending_statistics)) - measure_names = ["Mean ", "SD ", "ABDEV"] - print("ResID", end='', file=summary_output) - if start is None: - for item in range(4, len(residue_statistics[0]) + 4): - output = "{0:8d}".format(item) - print(output, end='', file=summary_output) - else: - for item in range(start + 3, len(residue_statistics[0]) + start + 3): - output = "{0:8d}".format(item) - print(output, end='', file=summary_output) - print('', file=summary_output) - - for measure, name in zip(residue_statistics, measure_names): - print(name, end='', file=summary_output) - for residue in measure: - output = "{0:8.1f}".format(residue) - print(output, end='', file=summary_output) - print('', file=summary_output) - - -def tilt_correct(number): - """Changes an angle (in degrees) so that it is between 0º and 90º""" - if number < 90.: - return number - else: - return 180. - number - - -def backup_file(filename): - if os.path.exists(filename): - target_name = "#" + filename - failure = True - if not os.path.exists(target_name): - os.rename(filename, target_name) - failure = False - else: - for i in range(20): - alt_target_name = target_name + "." + str(i) - if os.path.exists(alt_target_name): - continue - else: - os.rename(filename, alt_target_name) - failure = False - break - if failure: - raise IOError("Too many backups. Clean up and try again") - - -def stats(some_list): - if len(some_list) == 0: - return [0, 0, 0] - list_mean = np.mean(some_list) - list_sd = sample_sd(some_list, list_mean) - list_abdev = mean_abs_dev(some_list, list_mean) - return [list_mean, list_sd, list_abdev] - - -def helanal_main(pdbfile, select="name CA", ref_axis=None): - """Simple HELANAL_ run on a single frame PDB/GRO. - - Computed data are returned as a dict and also logged at level INFO to the - logger *MDAnalysis.analysis.helanal*. A simple way to enable a logger is to - use :func:`~MDAnalysis.lib.log.start_logging`. - - Parameters - ---------- - pdbfile : str - filename of the single-frame input file - select : str (optional) - selection string, default is "name CA" to select all C-alpha atoms. - ref_axis : array_like (optional) - Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]`` - is chosen [``None``] - - Returns - ------- - result : dict - The `result` contains keys - * Height: mean, stdev, abs dev - * Twist: mean, stdev, abs dev - * Residues/turn: mean, stdev, abs dev - * Local bending angles: array for computed angles (per residue) - * Unit twist angles: array for computed angles (per residue) - * Best fit tilt - * Rotation angles: local screw angles (per residue) - - - Notes - ----- - Only a single helix is analyzed. Use the selection to specify the - helix, e.g. with "name CA and resid 1:20". - - - Example - ------- - Analyze helix 8 in AdK (PDB 4AKE); the standard logger is started and - writes output to the file ``MDAnalysis.log``:: - - MDAnalysis.start_logging() - data = MDAnalysis.analysis.helanal_main("4ake_A.pdb", select="name CA and resnum 161-187") - - - .. versionchanged:: 0.13.0 - All output is returned as a dict and logged to the logger - *MDAnalysis.analysis.helanal* instead of being printed to stdout. - - .. versionchanged:: 0.16.0 - Removed the `start` and `end` keywords for selecting residues because this can - be accomplished more transparently with `select`. - - .. versionchanged:: 1.0.0 - Changed `selection` keyword to `select` - - """ - - warnings.warn("This function is deprecated as of MDAnalysis version 1.0. " - "It will be removed in MDAnalysis version 2.0. Please use " - "MDAnalysis.analysis.helix_analysis.helix_analysis on " - "a Universe instead.", - category=DeprecationWarning) - - universe = MDAnalysis.Universe(pdbfile) - ca = universe.select_atoms(select) - - logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues) - - twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \ - main_loop(ca.positions, ref_axis=ref_axis) - - #TESTED- origins are correct - #print current_origin - #print origins - - max_angle = np.max(bending_angles) - mean_angle = np.mean(bending_angles) - #sd calculated using n-1 to replicate original fortran- assumes a limited sample so uses the sample standard - # deviation - sd_angle = sample_sd(bending_angles, mean_angle) - mean_absolute_deviation_angle = mean_abs_dev(bending_angles, mean_angle) - #TESTED- stats correct - #print max_angle, mean_angle, sd_angle, mean_absolute_deviation_angle - - #calculate local bending matrix(now it is looking at all i, j combinations) - # (not used for helanal_main()) -# for i in local_helix_axes: -# for j in local_helix_axes: -# if (i == j).all(): -# angle = 0. -# else: -# angle = np.rad2deg(np.arccos(np.dot(i, j))) -# #string_angle = "%6.0f\t" % angle -# #print string_angle, -# #print '' -# #TESTED- local bending matrix! - - #Average helical parameters - mean_twist = np.mean(twist) - sd_twist = sample_sd(twist, mean_twist) - abdev_twist = mean_abs_dev(twist, mean_twist) - #TESTED-average twists - #print mean_twist, sd_twist, abdev_twist - mean_rnou = np.mean(rnou) - sd_rnou = sample_sd(rnou, mean_rnou) - abdev_rnou = mean_abs_dev(rnou, mean_rnou) - #TESTED-average residues per turn - #print mean_rnou, sd_rnou, abdev_rnou - mean_height = np.mean(height) - sd_height = sample_sd(height, mean_height) - abdev_height = mean_abs_dev(height, mean_height) - #TESTED- average rises - - #calculate best fit vector and tilt of said vector - fit_vector, fit_tilt = vector_of_best_fit(origins) - - data = { - 'Height': np.array([mean_height, sd_height, abdev_height]), - 'Twist': np.array([mean_twist, sd_twist, abdev_twist]), - 'Residues/turn': np.array([mean_rnou, sd_rnou, abdev_rnou]), - 'Local bending angles': np.asarray(bending_angles), - 'Unit twist angles': np.asarray(twist), - 'Best fit tilt': fit_tilt, - 'Rotation Angles': np.asarray(local_screw_angles), - } - - logger.info("Height: %g SD: %g ABDEV: %g (Angstroem)", mean_height, sd_height, abdev_height) - logger.info("Twist: %g SD: %g ABDEV: %g", mean_twist, sd_twist, abdev_twist) - logger.info("Residues/turn: %g SD: %g ABDEV: %g", mean_rnou, sd_rnou, abdev_rnou) - - output = " ".join(["{0:8.1f}\t".format(angle) for angle in bending_angles]) - logger.info("Local bending angles: %s", output) - - output = " ".join(["{0:8.1f}\t".format(twist_ang) for twist_ang in twist]) - logger.info("Unit twist angles: %s", output) - - logger.info("Best fit tilt: %g", fit_tilt) - - output = " ".join(["{0:.1f}".format(item) for item in local_screw_angles]) - logger.info("Rotation Angles from 1 to n-1 (local screw angles): %s", output) - - return data - -def origin_pdb(origins, pdbfile): - """Write origins to PDB (multi-frame). - - This PDB can be loaded together with the trajectory into, e.g. VMD_, to - view the helix axis together with all the atoms. - """ - with open(pdbfile, 'a') as output: - i = 1 - for xyz in origins: - tmp = "ATOM {0:3d} CA ALA {1:3d} {2:8.3f}{3:8.3f}{4:8.3f} 1.00 0.00".format(i, i, xyz[0], xyz[1], xyz[2]) - print(tmp, file=output) - i += 1 - print("TER\nENDMDL", file=output) - - -def main_loop(positions, ref_axis=None): - # rewrite in cython? - - if ref_axis is None: - ref_axis = np.array([0., 0., 1.]) - else: - ref_axis = np.asarray(ref_axis) - twist = [] - rnou = [] - height = [] - origins = [[0., 0., 0.] for item in positions[:-2]] - local_helix_axes = [] - location_rotation_vectors = [] - for i in range(len(positions) - 3): - vec12 = positions[i + 1] - positions[i] - vec23 = positions[i + 2] - positions[i + 1] - vec34 = positions[i + 3] - positions[i + 2] - - dv13 = vec12 - vec23 - dv24 = vec23 - vec34 - - #direction of the local helix axis - current_uloc = vecnorm(np.cross(dv13, dv24)) - local_helix_axes.append(current_uloc) - - #TESTED- Axes correct - #print current_uloc - - dmag = mdamath.norm(dv13) - emag = mdamath.norm(dv24) - - costheta = np.dot(dv13, dv24) / (dmag * emag) - #rnou is the number of residues per turn - current_twist = np.arccos(costheta) - twist.append(np.rad2deg(current_twist)) - rnou.append(2 * np.pi / current_twist) - #radius of local helix cylinder radmag - - costheta1 = 1.0 - costheta - radmag = (dmag * emag) ** 0.5 / (2 * costheta1) - - #Height of local helix cylinder - current_height = np.dot(vec23, current_uloc) - height.append(current_height) - #TESTED- Twists etc correct - #print current_twist*180/np.pi, 2*np.pi/current_twist, height - - dv13 = vecnorm(dv13) - dv24 = vecnorm(dv24) - - #record local rotation - location_rotation_vectors.append(dv13) - - rad = [radmag * item for item in dv13] - current_origin = [(item[0] - item[1]) for item in zip(positions[i + 1], rad)] - origins[i] = current_origin - - #TESTED- origins are correct - #print current_origin - - rad = [radmag * item for item in dv24] - current_origin = [(item[0] - item[1]) for item in zip(positions[i + 2], rad)] - origins[i + 1] = current_origin - #Record final rotation vector - location_rotation_vectors.append(dv24) - - #local bending angles (eg i > i+3, i+3 > i+6) - - bending_angles = [0 for item in range(len(local_helix_axes) - 3)] - for axis in range(len(local_helix_axes) - 3): - angle = np.arccos(np.dot(local_helix_axes[axis], local_helix_axes[axis + 3])) - bending_angles[axis] = np.rad2deg(angle) - #TESTED- angles are correct - #print np.rad2deg(angle) - - local_screw_angles = [] - #Calculate rotation angles for (+1) to (n-1) - fit_vector, fit_tilt = vector_of_best_fit(origins) - for item in location_rotation_vectors: - local_screw_tmp = np.rad2deg(rotation_angle(fit_vector, ref_axis, item)) - #print local_screw_tmp - local_screw_angles.append(local_screw_tmp) - - return twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles - - -def rotation_angle(helix_vector, axis_vector, rotation_vector): - reference_vector = np.cross(np.cross(helix_vector, axis_vector), helix_vector) - second_reference_vector = np.cross(axis_vector, helix_vector) - screw_angle = mdamath.angle(reference_vector, rotation_vector) - alt_screw_angle = mdamath.angle(second_reference_vector, rotation_vector) - updown = np.cross(reference_vector, rotation_vector) - - if not (np.pi < screw_angle < 3 * np.pi / 4): - if screw_angle < np.pi / 4 and alt_screw_angle < np.pi / 2: - screw_angle = np.pi / 2 - alt_screw_angle - elif screw_angle < np.pi / 4 and alt_screw_angle > np.pi / 2: - screw_angle = alt_screw_angle - np.pi / 2 - elif screw_angle > 3 * np.pi / 4 and alt_screw_angle < np.pi / 2: - screw_angle = np.pi / 2 + alt_screw_angle - elif screw_angle > 3 * np.pi / 4 and alt_screw_angle > np.pi / 2: - screw_angle = 3 * np.pi / 2 - alt_screw_angle - else: - logger.debug("Big Screw Up: screw_angle=%g degrees", np.rad2deg(screw_angle)) - - if mdamath.norm(updown) == 0: - logger.warning("PROBLEM (vector is at 0 or 180)") - - helix_dot_rehelix = mdamath.angle(updown, helix_vector) - - #if ( helix_dot_rehelix < np.pi/2 and helix_dot_rehelix >= 0 )or helix_dot_rehelix <-np.pi/2: - if (-np.pi / 2 < helix_dot_rehelix < np.pi / 2) or (helix_dot_rehelix > 3 * np.pi / 2): - screw_angle = -screw_angle - - return screw_angle - - -def vector_of_best_fit(origins): - origins = np.asarray(origins) - centroids = center(origins) - M = origins - centroids - A = np.dot(M.transpose(), M) - u, s, vh = np.linalg.linalg.svd(A) - vector = vh[0] - #Correct vector to face towards first residues - rough_helix = origins[0] - centroids - agreement = mdamath.angle(rough_helix, vector) - if not (-np.pi / 2 < agreement < np.pi / 2): - vector *= -1 - best_fit_tilt = mdamath.angle(vector, [0, 0, 1]) - return vector, best_fit_tilt diff --git a/package/MDAnalysis/analysis/helix_analysis.py b/package/MDAnalysis/analysis/helix_analysis.py index ce3b40ce6a0..197ef6d048a 100644 --- a/package/MDAnalysis/analysis/helix_analysis.py +++ b/package/MDAnalysis/analysis/helix_analysis.py @@ -42,6 +42,15 @@ .. _HELANAL: https://pubmed.ncbi.nlm.nih.gov/10798526/ +.. [Sugeta1967] Sugeta, H. and Miyazawa, T. 1967. General method for + calculating helical parameters of polymer chains from bond lengths, bond + angles and internal rotation angles. *Biopolymers* 5 673 - 679 + +.. [Bansal2000] Bansal M, Kumar S, Velavan R. 2000. + HELANAL - A program to characterise helix geometry in proteins. + *J Biomol Struct Dyn.* 17(5):811-819. + + Example use ----------- diff --git a/package/MDAnalysis/analysis/hole2/hole.py b/package/MDAnalysis/analysis/hole2/hole.py index b8532ebdd41..d3f0faf9edc 100644 --- a/package/MDAnalysis/analysis/hole2/hole.py +++ b/package/MDAnalysis/analysis/hole2/hole.py @@ -195,12 +195,12 @@ def hole(pdbfile, dcd_iniskip=0, dcd_step=1, keep_files=True): - """Run :program:`hole` on a single frame or a DCD trajectory. + r"""Run :program:`hole` on a single frame or a DCD trajectory. :program:`hole` is part of the HOLE_ suite of programs. It is used to analyze channels and cavities in proteins, especially ion channels. - Only a subset of all `HOLE control parameters `_ + Only a subset of all `HOLE control parameters `_ is supported and can be set with keyword arguments. Parameters @@ -238,7 +238,7 @@ def hole(pdbfile, path to the file specifying van der Waals radii for each atom. If set to ``None``, then a set of default radii, :data:`SIMPLE2_RAD`, is used (an extension of ``simple.rad`` from - the HOLE distribution). + the HOLE distribution). executable: str, optional Path to the :program:`hole` executable. (e.g. ``~/hole2/exe/hole``). If @@ -291,13 +291,13 @@ def hole(pdbfile, (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not aligned on one of these axis the results will clearly be approximate. If a guess is used then results should be carefully - checked. + checked. random_seed : int, optional integer number to start the random number generator. By default, :program:`hole` will use the time of the day. For reproducible runs (e.g., for testing) set ``random_seed`` - to an integer. + to an integer. ignore_residues : array_like, optional sequence of three-letter residues that are not taken into account during the calculation; wildcards are *not* @@ -326,7 +326,7 @@ def hole(pdbfile, initially. dcd_step : int, optional step size for going through the trajectory (skips ``dcd_step-1`` - frames). + frames). keep_files : bool, optional Whether to keep the HOLE output files and possible temporary symlinks after running the function. Default: ``True`` @@ -407,14 +407,13 @@ def hole(pdbfile, class HoleAnalysis(AnalysisBase): - - """ + r""" Run :program:`hole` on a trajectory. :program:`hole` is part of the HOLE_ suite of programs. It is used to analyze channels and cavities in proteins, especially ion channels. - Only a subset of all `HOLE control parameters `_ + Only a subset of all `HOLE control parameters `_ is supported and can be set with keyword arguments. This class creates temporary PDB files for each frame and runs HOLE on @@ -477,7 +476,7 @@ class HoleAnalysis(AnalysisBase): (1,0,0), Y axis (0,1,0) or Z axis (0,0,1). If the structure is not aligned on one of these axis the results will clearly be approximate. If a guess is used then results should be carefully - checked. + checked. sample : float, optional distance of sample points in Å. Specifies the distance between the planes used in the HOLE @@ -485,13 +484,13 @@ class HoleAnalysis(AnalysisBase): purposes. However, if you wish to visualize a very tight constriction then specify a smaller value. This value determines how many points in the pore profile are - calculated. + calculated. end_radius : float, optional Radius in Å, which is considered to be the end of the pore. This keyword can be used to specify the radius above which the program regards a result as indicating that the end of the pore has been reached. This may need to be increased for large channels, - or reduced for small channels. + or reduced for small channels. output_level : int, optional Determines the output of output in the ``outfile``. For automated processing, this must be < 3. @@ -508,10 +507,10 @@ class HoleAnalysis(AnalysisBase): supported. Note that all residues must have 3 letters. Pad with space on the right-hand side if necessary. prefix: str, optional - Prefix for HOLE output files. + Prefix for HOLE output files. write_input_files: bool, optional Whether to write out the input HOLE text as files. - Files are called `hole.inp`. + Files are called `hole.inp`. Returns @@ -783,7 +782,7 @@ def create_vmd_surface(self, filename='hole.vmd', dot_density=15, source hole.vmd The level of detail is determined by ``dot_density``. - The surface will be colored by ``no_water_color``, ``one_water_color``, and + The surface will be colored by ``no_water_color``, ``one_water_color``, and ``double_water_color``. You can change these in the Tk Console:: @@ -805,14 +804,14 @@ def create_vmd_surface(self, filename='hole.vmd', dot_density=15, (few dots per sphere) and 35 (many dots per sphere). no_water_color: str, optional - Color of the surface where the pore radius is too tight for a + Color of the surface where the pore radius is too tight for a water molecule. one_water_color: str, optional Color of the surface where the pore can fit one water molecule. double_water_color: str, optional - Color of the surface where the radius is at least double the + Color of the surface where the radius is at least double the minimum radius for one water molecule. @@ -859,7 +858,7 @@ def create_vmd_surface(self, filename='hole.vmd', dot_density=15, frames.append('set triangles({i}) '.format(i=i) + tri) trinorms = '\n'.join(frames) - vmd_1 = vmd_script_array.format(no_water_color=no_water_color, + vmd_1 = vmd_script_array.format(no_water_color=no_water_color, one_water_color=one_water_color, double_water_color=double_water_color) vmd_text = vmd_1 + trinorms + vmd_script_function @@ -868,7 +867,7 @@ def create_vmd_surface(self, filename='hole.vmd', dot_density=15, f.write(vmd_text) return filename - + def min_radius(self): """Return the minimum radius over all profiles as a function of q""" if not self.profiles: @@ -948,7 +947,7 @@ def plot(self, frames=None, linestyle='-', y_shift=0.0, label=True, ax=None, legend_loc='best', **kwargs): - """Plot HOLE profiles :math:`R(\zeta)` in a 1D graph. + r"""Plot HOLE profiles :math:`R(\zeta)` in a 1D graph. Lines are colored according to the specified ``color`` or drawn from the color map ``cmap``. One line is @@ -1016,7 +1015,7 @@ def plot3D(self, frames=None, color=None, cmap='viridis', linestyle='-', ax=None, r_max=None, ylabel='Frames', **kwargs): - """Stacked 3D graph of profiles :math:`R(\zeta)`. + r"""Stacked 3D graph of profiles :math:`R(\zeta)`. Lines are colored according to the specified ``color`` or drawn from the color map ``cmap``. One line is @@ -1122,7 +1121,7 @@ def over_order_parameters(self, order_parameters, frames=None): 'or a filename with array data ' 'that can be read by np.loadtxt') raise ValueError(msg.format(order_parameters)) - + order_parameters = np.asarray(order_parameters) @@ -1298,13 +1297,13 @@ def bin_radii(self, frames=None, bins=100, range=None): def histogram_radii(self, aggregator=np.mean, frames=None, bins=100, range=None): """Histograms the pore radii into bins by reaction coordinate, - aggregate the radii with an `aggregator` function, and returns the + aggregate the radii with an `aggregator` function, and returns the aggregated radii and bin edges. Parameters ---------- aggregator: callable, optional - this function must take an iterable of floats and return a + this function must take an iterable of floats and return a single value. Default: np.mean frames: int or iterable of ints, optional @@ -1418,7 +1417,7 @@ def plot3D_order_parameters(self, order_parameters, r_max=None, ylabel=r'Order parameter', **kwargs): - """Plot HOLE radii over order parameters as a 3D graph. + r"""Plot HOLE radii over order parameters as a 3D graph. Lines are colored according to the specified ``color`` or drawn from the color map ``cmap``. One line is diff --git a/package/MDAnalysis/analysis/hole2/templates.py b/package/MDAnalysis/analysis/hole2/templates.py index 855104aaddb..cc4182f7b11 100644 --- a/package/MDAnalysis/analysis/hole2/templates.py +++ b/package/MDAnalysis/analysis/hole2/templates.py @@ -45,7 +45,7 @@ #: *Simple* - Only use one value for each element C O H etc. #: Added radii for K+, NA+, CL- (Pauling hydration radius from Hille 2002). #: The data file can be written with the convenience function :func:`write_simplerad2`. -SIMPLE2_RAD = """ +SIMPLE2_RAD = r""" remark: Time-stamp: <2005-11-21 13:57:55 oliver> [OB] remark: van der Waals radii: AMBER united atom remark: from Weiner et al. (1984), JACS, vol 106 pp765-768 @@ -111,7 +111,7 @@ array set triangles {{}} """ -vmd_script_function = """ +vmd_script_function = r""" global vmd_frame; trace add variable vmd_frame([molinfo top]) write drawFrame @@ -134,6 +134,6 @@ } } -drawFrame 0 0 0 +drawFrame 0 0 0 """ diff --git a/package/MDAnalysis/analysis/hole2/utils.py b/package/MDAnalysis/analysis/hole2/utils.py index 317060ce6d2..a13aeab42c9 100644 --- a/package/MDAnalysis/analysis/hole2/utils.py +++ b/package/MDAnalysis/analysis/hole2/utils.py @@ -22,8 +22,9 @@ # -"""HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.helper` -===================================================================================== +""" +HOLE Analysis --- :mod:`MDAnalysis.analysis.hole2.helper` +========================================================= :Author: Lily Wang :Year: 2020 @@ -130,10 +131,10 @@ def check_and_fix_long_filename(filename, tmpdir=os.path.curdir, fd, newname = tempfile.mkstemp(suffix=ext, dir=dirname) os.close(fd) os.unlink(newname) - + if len(newname) > max_length: newname = os.path.relpath(newname) - + if len(newname) <= max_length: os.symlink(filename, newname) msg = 'Using symlink: {} -> {}' @@ -173,13 +174,13 @@ def set_up_hole_input(pdbfile, are read. infile_text: str, optional - HOLE input text or template. If set to ``None``, the function will + HOLE input text or template. If set to ``None``, the function will create the input text from the other parameters. Default: ``None`` infile: str, optional - File to write the HOLE input text for later inspection. If set to - ``None``, the input text is not written out. + File to write the HOLE input text for later inspection. If set to + ``None``, the input text is not written out. Default: ``None`` sphpdb_file : str, optional @@ -194,7 +195,7 @@ def set_up_hole_input(pdbfile, distance of particular atoms from the sphere-centre line. .sph files can be used to produce molecular graphical output from a hole run, by using the - :program:`sph_process` program to read the .sph file. + :program:`sph_process` program to read the .sph file. Default: 'hole.sph' vdwradii_file: str, optional @@ -204,7 +205,7 @@ def set_up_hole_input(pdbfile, the HOLE distribution). Default: ``None`` tmpdir: str, optional - The temporary directory that files can be symlinked to, to shorten + The temporary directory that files can be symlinked to, to shorten the path name. HOLE can only read filenames up to a certain length. Default: current working directory @@ -215,13 +216,13 @@ def set_up_hole_input(pdbfile, purposes. However, if you wish to visualize a very tight constriction then specify a smaller value. This value determines how many points in the pore profile are - calculated. Default: 0.2 + calculated. Default: 0.2 end_radius : float, optional Radius in Å, which is considered to be the end of the pore. This keyword can be used to specify the radius above which the program regards a result as indicating that the end of the pore - has been reached. This may need to be increased for large channels, + has been reached. This may need to be increased for large channels, or reduced for small channels. Default: 22.0 cpoint : array_like, 'center_of_geometry' or None, optional @@ -249,7 +250,7 @@ def set_up_hole_input(pdbfile, cvect : array_like, optional Search direction, should be parallel to the pore axis, - e.g. ``[0,0,1]`` for the z-axis. + e.g. ``[0,0,1]`` for the z-axis. If this keyword is ``None`` (the default), then HOLE attempts to guess where the channel will be. The procedure assumes that the channel is reasonably symmetric. The guess will be either along the X axis @@ -261,15 +262,15 @@ def set_up_hole_input(pdbfile, random_seed : int, optional integer number to start the random number generator. By default, - :program:`hole` will use the time of the day. - For reproducible runs (e.g., for testing) set ``random_seed`` + :program:`hole` will use the time of the day. + For reproducible runs (e.g., for testing) set ``random_seed`` to an integer. Default: ``None`` ignore_residues : array_like, optional sequence of three-letter residues that are not taken into account during the calculation; wildcards are *not* - supported. Note that all residues must have 3 letters. Pad - with space on the right-hand side if necessary. + supported. Note that all residues must have 3 letters. Pad + with space on the right-hand side if necessary. Default: {}. output_level : int, optional @@ -281,7 +282,7 @@ def set_up_hole_input(pdbfile, 2: Ditto plus no graph type output - only leaving minimum radius and conductance calculations. 3: All text output other than input card mirroring and error messages - turned off. + turned off. Default: 0 dcd : str, optional @@ -380,7 +381,7 @@ def run_hole(outfile, infile_text, executable): outfile: str Output file name infile_text: str - HOLE input text + HOLE input text (typically generated by :func:`set_up_hole_input`) executable: str HOLE executable @@ -487,19 +488,19 @@ def create_vmd_surface(sphpdb='hole.sph', sphpdb: str, optional sphpdb file to read. Default: 'hole.sph' filename: str, optional - output VMD surface file. If ``None``, a temporary file + output VMD surface file. If ``None``, a temporary file is generated. Default: ``None`` sph_process: str, optional Executable for ``sph_process`` program. Default: 'sph_process' sos_triangle: str, optional - Executable for ``sos_triangle`` program. Default: 'sos_triangle' + Executable for ``sos_triangle`` program. Default: 'sos_triangle' dot_density: int, optional density of facets for generating a 3D pore representation. The number controls the density of dots that will be used. - A sphere of dots is placed on each centre determined in the - Monte Carlo procedure. The actual number of dots written is - controlled by ``dot_density`` and the ``sample`` level of the - original analysis. ``dot_density`` should be set between 5 + A sphere of dots is placed on each centre determined in the + Monte Carlo procedure. The actual number of dots written is + controlled by ``dot_density`` and the ``sample`` level of the + original analysis. ``dot_density`` should be set between 5 (few dots per sphere) and 35 (many dots per sphere). Default: 15 diff --git a/package/MDAnalysis/analysis/leaflet.py b/package/MDAnalysis/analysis/leaflet.py index b9416a381c7..1749faf593d 100644 --- a/package/MDAnalysis/analysis/leaflet.py +++ b/package/MDAnalysis/analysis/leaflet.py @@ -93,9 +93,8 @@ class LeafletFinder(object): Parameters ---------- - universe : Universe or str - :class:`MDAnalysis.Universe` or a file name (e.g., in PDB or - GRO format) + universe : Universe + :class:`~MDAnalysis.core.universe.Universe` object. select : AtomGroup or str A AtomGroup instance or a :meth:`Universe.select_atoms` selection string @@ -119,7 +118,8 @@ class LeafletFinder(object): consecutively, starting at 0. To obtain the atoms in the input structure use :meth:`LeafletFinder.groups`:: - L = LeafletFinder(PDB, 'name P*') + u_PDB = mda.Universe(PDB) + L = LeafletFinder(u_PDB, 'name P*') leaflet0 = L.groups(0) leaflet1 = L.groups(1) @@ -132,12 +132,15 @@ class LeafletFinder(object): leaflet0.residues.atoms + .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` + .. versionchanged:: 2.0.0 + The universe keyword no longer accepts non-Universe arguments. Please + create a :class:`~MDAnalysis.core.universe.Universe` first. """ def __init__(self, universe, select, cutoff=15.0, pbc=False, sparse=None): - universe = core.universe.as_Universe(universe) self.universe = universe self.selectionstring = select if isinstance(self.selectionstring, core.groups.AtomGroup): diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index a49594282d5..a6fb5d4ab80 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -23,7 +23,6 @@ """ H5MD trajectories --- :mod:`MDAnalysis.coordinates.H5MD` ======================================================== - The `H5MD`_ trajectory file format is based upon the general, high performance `HDF5`_ file format. HDF5 files are self documenting and can be accessed with the `h5py`_ library. @@ -182,7 +181,6 @@ .. autoclass:: H5PYPicklable :members: - """ import numpy as np @@ -236,7 +234,7 @@ def dimensions(self, box): class H5MDReader(base.ReaderBase): - """Reader for the H5MD format. + r"""Reader for the H5MD format. See `h5md documentation `_ for a detailed overview of the H5MD file format. @@ -385,6 +383,7 @@ def __init__(self, filename, convert_units=True, driver=None, comm=None, + read_direct=None, **kwargs): """ Parameters @@ -419,8 +418,9 @@ def __init__(self, filename, NoDataError when the H5MD file has no 'position', 'velocity', or 'force' group - """ + + self.read_direct = read_direct if not HAS_H5PY: raise RuntimeError("Please install h5py") super(H5MDReader, self).__init__(filename, **kwargs) @@ -600,7 +600,7 @@ def _read_frame(self, frame): else: raise NoDataError("Provide at least a position, velocity" " or force group in the h5md file.") - except ValueError: + except (ValueError, IndexError): raise IOError from None self._frame = frame @@ -623,12 +623,24 @@ def _read_frame(self, frame): # set the timestep positions, velocities, and forces with # current frame dataset - if self._has['position']: - ts.positions = self._get_frame_dataset('position') - if self._has['velocity']: - ts.velocities = self._get_frame_dataset('velocity') - if self._has['force']: - ts.forces = self._get_frame_dataset('force') + if self.read_direct is True: + if self._has['position']: + self._particle_group['position/value'].read_direct(ts.positions, source_sel=np.s_[frame, :]) + if self._has['velocity']: + self._particle_group['velocity/value'].read_direct(ts.velocities, source_sel=np.s_[frame, :]) + if self._has['force']: + self._particle_group['force/value'].read_direct(ts.forces, source_sel=np.s_[frame, :]) + + else: + if self._has['position']: + ts.positions = self._get_frame_dataset('position') + if self._has['velocity']: + ts.velocities = self._get_frame_dataset('velocity') + if self._has['force']: + ts.forces = self._get_frame_dataset('force') + + + if self.convert_units: self._convert_units() @@ -826,3 +838,274 @@ def __setstate__(self, state): def __getnewargs__(self): """Override the h5py getnewargs to skip its error message""" return () + + +class H5MDWriter(base.WriterBase): + """Writer for the H5MD format. + + Classes + ------- + + .. autoclass:: Timestep + :members: + + .. attribute:: positions + + coordinates of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)`; only available if the trajectory contains positions + or if the ``positions = True`` keyword has been supplied. + + .. attribute:: velocities + + velocities of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)`; only available if the trajectory contains velocities + or if the ``velocities = True`` keyword has been supplied. + + .. attribute:: forces + + forces of the atoms as a :class:`numpy.ndarray` of shape + `(n_atoms, 3)`; only available if the trajectory contains forces + or if the ``forces = True`` keyword has been supplied. + + + .. autoclass:: H5MDReader + :members: + + .. versionadded:: 2.0.0 + + """ + + format = 'H5MD' + multiframe = True + _unit_translation_dict = { + 'time': { + 'ps': 'ps', + 'fs': 'fs', + 'ns': 'ns', + 'second': 's', + 'sec': 's', + 's': 's', + 'AKMA': 'AKMA', + }, + 'length': { + 'Angstrom': 'Angstrom', + 'angstrom': 'Angstrom', + 'A': 'Angstrom', + 'nm': 'nm', + 'pm': 'pm', + 'fm': 'fm', + }, + 'velocity': { + 'Angstrom/ps': 'Angstrom ps-1', + 'A/ps': 'Angstrom ps-1', + 'Angstrom/fs': 'Angstrom fs-1', + 'A/fs': 'Angstrom fs-1', + 'Angstrom/AKMA': 'Angstrom AKMA-1', + 'A/AKMA': 'Angstrom AKMA-1', + 'nm/ps': 'nm ps-1', + 'nm/ns': 'nm ns-1', + 'pm/ps': 'pm ps-1', + 'm/s': 'm s-1' + }, + 'force': { + 'kJ/(mol*Angstrom)': 'kJ mol-1 Angstrom-1', + 'kJ/(mol*nm)': 'kJ mol-1 nm-1', + 'Newton': 'Newton', + 'N': 'N', + 'J/m': 'J m-1', + 'kcal/(mol*Angstrom)': 'kcal mol-1 Angstrom-1', + 'kcal/(mol*A)': 'kcal mol-1 Angstrom-1' + } + } + + def __init__(self, + filename, + n_atoms, + n_frames, + driver=None, + comm=None, + convert_units=True, + units=None, + **kwargs): + """ + Parameters + ---------- + filename : str or :class:`h5py.File` + trajectory filename or open h5py file + n_atoms : int + number of atoms in trajectory + n_frames: int + number of frames in trajectory + convert_units : bool (optional) + convert units from MDAnalysis to desired units + **kwargs : dict + + """ + + self.filename = filename + if n_atoms == 0: + raise ValueError("H5MDWriter: no atoms in output trajectory") + self._driver = driver + self._comm = comm + self.n_atoms = n_atoms + self.n_frames = n_frames + self.units = None + self.convert_units = convert_units + + self.h5md_file = None + self.has_positions = kwargs.get('positions', False) + self.has_velocities = kwargs.get('velocities', False) + self.has_forces = kwargs.get('forces', False) + self._has = {'position': self.has_positions, + 'velocity': self.has_velocities, + 'force': self.has_forces} + self.chunks = kwargs.get('chunks', None) + + # Pull out various keywords to store in 'h5md' group + self.author = kwargs.pop('author', 'N/A') + self.author_email = kwargs.pop('author_email', None) + self.creator = kwargs.pop('creator', 'MDAnalysis') + self.creator_version = kwargs.pop('creator_version', mda.__version__) + + def _write_next_frame(self, ag): + """Write information associated with ``ag`` at current frame + into trajectory + + Parameters + ---------- + ag : AtomGroup or Universe + + .. versionadded:: 2.0.0 + + """ + try: + # Atomgroup? + ts = ag.ts + self.units = ag.universe.trajectory.units + except AttributeError: + try: + # Universe? + ts = ag.trajectory.ts + self.units = ag.trajectory.units + except AttributeError: + errmsg = "Input obj is neither an AtomGroup or Universe" + raise TypeError(errmsg) from None + + if ts.n_atoms != self.n_atoms: + raise IOError("H5MDWriter: Timestep does not have" + " the correct number of atoms") + + # Opens H5MD file to write with H5PY library and initializes + # 'h5md' group and checks if unitcell is periodic + if self.h5md_file is None: + self._init_h5md(ts) + + return self._write_next_timestep(ts) + + def is_periodic(self, ts): + """Test if timestep ``ts`` contains a periodic box. + + Parameters + ---------- + ts : :class:`Timestep` + :class:`Timestep` instance containing coordinates to + be written to trajectory file + + Returns + ------- + bool + Return ``True`` if `ts` contains a valid simulation box + """ + return np.all(ts.dimensions > 0) + + def _init_h5md(self, ts): + """Initializes H5MD trajectory. + + The `H5MD`_ file is opened using the `H5PY`_ library. """ + + if self._comm is not None: + h5md_file = h5py.File(self.filename, 'w', + driver=self._driver, + comm=self._comm) + else: + h5md_file = h5py.File(self.filename, 'w') + self.h5md_file = h5md_file + + # fill in H5MD file metadata + h5md_group = h5md_file.create_group('h5md') + h5md_group.attrs['version'] = np.array([1,1]) + author_group = h5md_file.create_group('h5md/author') + author_group.attrs['name'] = self.author + if self.author_email is not None: + author_group.attrs['email'] = self.author_email + creator_group = h5md_file.create_group('h5md/creator') + creator_group.attrs['name'] = self.creator + creator_group.attrs['version'] = self.creator_version + + h5md_file.require_group('particles').require_group('trajectory') + trajectory = self.h5md_file['particles/trajectory'] + trajectory.create_group('box').create_group('edges') + trajectory['box'].attrs['dimension'] = 3 + if self.is_periodic(ts): + trajectory.create_dataset('box/edges/value', shape=(self.n_frames, 3, 3), dtype=np.float32) + trajectory.create_dataset('box/edges/step', shape=(self.n_frames,), dtype=np.int32) + trajectory.create_dataset('box/edges/time', shape=(self.n_frames,), dtype=np.float32) + trajectory['box'].attrs['boundary'] = ['periodic', 'periodic', 'periodic'] + else: + trajectory['box'].attrs['boundary'] = [None, None, None] + + for name, value in self._has.items(): + if value: + self._create_dataset(name) + + def _create_dataset(self, group): + """ """ + + # need this dictionary to associate 'position': 'length' + _group_unit_dict = {'position': 'length', + 'velocity': 'velocity', + 'force': 'force'} + + trajectory = self.h5md_file['particles/trajectory'] + + trajectory.create_group(group) + trajectory.create_dataset(f'{group}/value', shape=(self.n_frames, self.n_atoms, 3), dtype=np.float32, chunks=self.chunks) + trajectory.create_dataset(f'{group}/step', shape=(self.n_frames,), dtype=np.int32) + trajectory.create_dataset(f'{group}/time', shape=(self.n_frames,), dtype=np.float32) + if self.units is not None: + trajectory[f'{group}/value'].attrs['unit'] = self._unit_translation_dict[_group_unit_dict[group]][self.units[_group_unit_dict[group]]] + trajectory[f'{group}/time'].attrs['unit'] = self._unit_translation_dict['time'][self.units['time']] + + def _write_next_timestep(self, ts): + """Write coordinates and unitcell information to H5MD file. + + Do not call this method directly; instead use + :meth:`write` because some essential setup is done + there before writing the first frame. + + """ + + file = self.h5md_file + + '''_group_ts_dict = {'position': ts._pos, + 'velocity': ts._velocities, + 'force': ts._forces}''' + + if self.has_positions: + file['particles/trajectory/position/value'][ts.frame] = ts._pos + file['particles/trajectory/position/step'][ts.frame] = ts.data['step'] + file['particles/trajectory/position/time'][ts.frame] = ts.time + + if self.has_velocities: + file['particles/trajectory/velocity/value'][ts.frame] = ts._velocities + file['particles/trajectory/velocity/step'][ts.frame] = ts.data['step'] + file['particles/trajectory/velocity/time'][ts.frame] = ts.time + + if self.has_forces: + file['particles/trajectory/force/value'][ts.frame] = ts._forces + file['particles/trajectory/force/step'][ts.frame] = ts.data['step'] + file['particles/trajectory/force/time'][ts.frame] = ts.time + + file['particles/trajectory/box/edges/value'][ts.frame] = ts._unitcell + file['particles/trajectory/box/edges/step'][ts.frame] = ts.data['step'] + file['particles/trajectory/box/edges/time'][ts.frame] = ts.time diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 8b90c1bda9e..3dd15e1a380 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -533,6 +533,10 @@ class PDBWriter(base.WriterBase): ChainID now comes from the last character of segid, as stated in the documentation. An indexing issue meant it previously used the first charater (Issue #2224) + .. versionchanged:: 2.0.0 + Add the `redindex` argument. Setting this keyword to ``True`` + (the default) preserves the behavior in earlier versions of MDAnalysis. + """ fmt = { 'ATOM': ( @@ -596,7 +600,7 @@ class PDBWriter(base.WriterBase): def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1, remarks="Created by PDBWriter", - convert_units=True, multiframe=None): + convert_units=True, multiframe=None, reindex=True): """Create a new PDBWriter Parameters @@ -624,6 +628,9 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1, ``False``: write a single frame to the file; ``True``: create a multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_ records. If ``None``, then the class default is chosen. [``None``] + reindex: bool (optional) + If ``True`` (default), the atom serial is set to be consecutive + numbers starting at 1. Else, use the atom id. """ # n_atoms = None : dummy keyword argument @@ -637,6 +644,7 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1, self.convert_units = convert_units self._multiframe = self.multiframe if multiframe is None else multiframe self.bonds = bonds + self._reindex = reindex if start < 0: raise ValueError("'Start' must be a positive value") @@ -792,14 +800,35 @@ def _write_pdb_bonds(self): if not self.obj or not hasattr(self.obj.universe, 'bonds'): return - mapping = {index: i for i, index in enumerate(self.obj.atoms.indices)} - bondset = set(itertools.chain(*(a.bonds for a in self.obj.atoms))) + if self._reindex: + index_attribute = 'index' + mapping = { + index: i + for i, index in enumerate(self.obj.atoms.indices, start=1) + } + atoms = np.sort(self.obj.atoms.indices) + else: + index_attribute = 'id' + mapping = {id_: id_ for id_ in self.obj.atoms.ids} + atoms = np.sort(self.obj.atoms.ids) if self.bonds == "conect": # Write out only the bonds that were defined in CONECT records - bonds = ((bond[0].index, bond[1].index) for bond in bondset if not bond.is_guessed) + bonds = ( + ( + getattr(bond[0], index_attribute), + getattr(bond[1], index_attribute), + ) + for bond in bondset if not bond.is_guessed + ) elif self.bonds == "all": - bonds = ((bond[0].index, bond[1].index) for bond in bondset) + bonds = ( + ( + getattr(bond[0], index_attribute), + getattr(bond[1], index_attribute), + ) + for bond in bondset + ) else: raise ValueError("bonds has to be either None, 'conect' or 'all'") @@ -810,8 +839,6 @@ def _write_pdb_bonds(self): con[a2].append(a1) con[a1].append(a2) - atoms = np.sort(self.obj.atoms.indices) - conect = ([mapping[a]] + sorted([mapping[at] for at in con[a]]) for a in atoms if a in con) @@ -1042,9 +1069,22 @@ def get_attr(attrname, default): atomnames = get_attr('names', 'X') record_types = get_attr('record_types', 'ATOM') + # If reindex == False, we use the atom ids for the serial. We do not + # want to use a fallback here. + if not self._reindex: + try: + atom_ids = atoms.ids + except AttributeError: + raise NoDataError( + 'The "id" topology attribute is not set. ' + 'Either set the attribute or use reindex=True.' + ) + else: + atom_ids = np.arange(len(atoms)) + 1 + for i, atom in enumerate(atoms): vals = {} - vals['serial'] = util.ltruncate_int(i + 1, 5) # check for overflow here? + vals['serial'] = util.ltruncate_int(atom_ids[i], 5) # check for overflow here? vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i]) vals['altLoc'] = altlocs[i][:1] vals['resName'] = resnames[i][:4] @@ -1153,7 +1193,7 @@ def CONECT(self, conect): """Write CONECT_ record. """ - conect = ["{0:5d}".format(entry + 1) for entry in conect] + conect = ["{0:5d}".format(entry) for entry in conect] conect = "".join(conect) self.pdbfile.write(self.fmt['CONECT'].format(conect)) diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index 3c64241d109..30175e7fbcf 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -21,25 +21,30 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""RDKit molecule --- :mod:`MDAnalysis.coordinates.RDKit` +"""RDKit molecule I/O --- :mod:`MDAnalysis.coordinates.RDKit` ================================================================ -Read coordinates data from an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitReader` -into a MDAnalysis Universe. Convert it back to a :class:`rdkit.Chem.rdchem.Mol` with -:class:`RDKitConverter`. +Read coordinates data from an `RDKit `__ :class:`rdkit.Chem.rdchem.Mol` with +:class:`RDKitReader` into an MDAnalysis Universe. Convert it back to a +:class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitConverter`. Example ------- ->>> from rdkit import Chem ->>> import MDAnalysis as mda ->>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False) ->>> u = mda.Universe(mol) ->>> u - ->>> u.trajectory - +To read an RDKit molecule and then convert the AtomGroup back to an RDKit +molecule:: + + >>> from rdkit import Chem + >>> import MDAnalysis as mda + >>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False) + >>> u = mda.Universe(mol) + >>> u + + >>> u.trajectory + + >>> u.atoms.convert_to("RDKIT") + Classes @@ -51,19 +56,59 @@ .. autoclass:: RDKitConverter :members: +.. autofunction:: _infer_bo_and_charges + +.. autofunction:: _standardize_patterns + +.. autofunction:: _rebuild_conjugated_bonds """ import warnings +import re +import copy import numpy as np +from ..exceptions import NoDataError +from ..topology.guessers import guess_atom_element +from ..core.topologyattrs import _TOPOLOGY_ATTRS from . import memory +from . import base + +try: + from rdkit import Chem + from rdkit.Chem import AllChem +except ImportError: + pass +else: + RDBONDORDER = { + 1: Chem.BondType.SINGLE, + 1.5: Chem.BondType.AROMATIC, + "ar": Chem.BondType.AROMATIC, + 2: Chem.BondType.DOUBLE, + 3: Chem.BondType.TRIPLE, + } + # add string version of the key for each bond + RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()}) + RDATTRIBUTES = { + "altLocs": "AltLoc", + "chainIDs": "ChainId", + "icodes": "InsertionCode", + "names": "Name", + "occupancies": "Occupancy", + "resnames": "ResidueName", + "resids": "ResidueNumber", + "segindices": "SegmentNumber", + "tempfactors": "TempFactor", + "bfactors": "TempFactor", + } + PERIODIC_TABLE = Chem.GetPeriodicTable() class RDKitReader(memory.MemoryReader): """Coordinate reader for RDKit. - + .. versionadded:: 2.0.0 """ format = 'RDKIT' @@ -84,21 +129,713 @@ def _format_hint(thing): def __init__(self, filename, **kwargs): """Read coordinates from an RDKit molecule. - Each conformer in the original RDKit molecule will be read as a frame + Each conformer in the original RDKit molecule will be read as a frame in the resulting universe. Parameters ---------- - filename : rdkit.Chem.rdchem.Mol RDKit molecule """ n_atoms = filename.GetNumAtoms() coordinates = np.array([ - conf.GetPositions() for conf in filename.GetConformers()], + conf.GetPositions() for conf in filename.GetConformers()], dtype=np.float32) if coordinates.size == 0: warnings.warn("No coordinates found in the RDKit molecule") - coordinates = np.empty((1,n_atoms,3), dtype=np.float32) + coordinates = np.empty((1, n_atoms, 3), dtype=np.float32) coordinates[:] = np.nan - super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) \ No newline at end of file + super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) + + +class RDKitConverter(base.ConverterBase): + """Convert MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` or + :class:`~MDAnalysis.core.universe.Universe` to RDKit + :class:`~rdkit.Chem.rdchem.Mol` + + MDanalysis attributes are stored in each RDKit + :class:`~rdkit.Chem.rdchem.Atom` of the resulting molecule in two different + ways: + + * in an :class:`~rdkit.Chem.rdchem.AtomPDBResidueInfo` object available + through the :meth:`~rdkit.Chem.rdchem.Atom.GetMonomerInfo` method if it's + an attribute that is typically found in a PDB file, + * directly as an atom property available through the + :meth:`~rdkit.Chem.rdchem.Atom.GetProp` methods for the others. + + Supported attributes: + + +-----------------------+-------------------------------------------+ + | MDAnalysis attribute | RDKit | + +=======================+===========================================+ + | altLocs | atom.GetMonomerInfo().GetAltLoc() | + +-----------------------+-------------------------------------------+ + | chainIDs | atom.GetMonomerInfo().GetChainId() | + +-----------------------+-------------------------------------------+ + | icodes | atom.GetMonomerInfo().GetInsertionCode() | + +-----------------------+-------------------------------------------+ + | names | atom.GetMonomerInfo().GetName() | + +-----------------------+-------------------------------------------+ + | occupancies | atom.GetMonomerInfo().GetOccupancy() | + +-----------------------+-------------------------------------------+ + | resnames | atom.GetMonomerInfo().GetResidueName() | + +-----------------------+-------------------------------------------+ + | resids | atom.GetMonomerInfo().GetResidueNumber() | + +-----------------------+-------------------------------------------+ + | segindices | atom.GetMonomerInfo().GetSegmentNumber() | + +-----------------------+-------------------------------------------+ + | tempfactors | atom.GetMonomerInfo().GetTempFactor() | + +-----------------------+-------------------------------------------+ + | bfactors | atom.GetMonomerInfo().GetTempFactor() | + +-----------------------+-------------------------------------------+ + | charges | atom.GetDoubleProp("_MDAnalysis_charge") | + +-----------------------+-------------------------------------------+ + | indices | atom.GetIntProp("_MDAnalysis_index") | + +-----------------------+-------------------------------------------+ + | segids | atom.GetProp("_MDAnalysis_segid") | + +-----------------------+-------------------------------------------+ + | types | atom.GetProp("_MDAnalysis_type") | + +-----------------------+-------------------------------------------+ + + Example + ------- + + To access MDAnalysis properties:: + + >>> import MDAnalysis as mda + >>> from MDAnalysis.tests.datafiles import PDB_full + >>> u = mda.Universe(PDB_full) + >>> mol = u.select_atoms('resname DMS').convert_to('RDKIT') + >>> mol.GetAtomWithIdx(0).GetMonomerInfo().GetResidueName() + 'DMS' + + To create a molecule for each frame of a trajectory:: + + from MDAnalysisTests.datafiles import PSF, DCD + from rdkit.Chem.Descriptors3D import Asphericity + + u = mda.Universe(PSF, DCD) + elements = mda.topology.guessers.guess_types(u.atoms.names) + u.add_TopologyAttr('elements', elements) + ag = u.select_atoms("resid 1-10") + + for ts in u.trajectory: + mol = ag.convert_to("RDKIT") + x = Asphericity(mol) + + + Notes + ----- + + The converter requires the :class:`~MDAnalysis.core.topologyattrs.Elements` + attribute to be present in the topology, else it will fail. + + It also requires the `bonds` attribute, although they will be automatically + guessed if not present. + + If both ``tempfactors`` and ``bfactors`` attributes are present, the + conversion will fail, since only one of these should be present. + Refer to Issue #1901 for a solution + + Hydrogens should be explicit in the topology file. If this is not the case, + use the parameter ``NoImplicit=False`` when using the converter to allow + implicit hydrogens and disable inferring bond orders and charges. + + Since one of the main use case of the converter is converting trajectories + and not just a topology, creating a new molecule from scratch for every + frame would be too slow so the converter uses a caching system. The cache + only remembers the id of the last AtomGroup that was converted, as well + as the arguments that were passed to the converter. This means that using + ``u.select_atoms("protein").convert_to("RDKIT")`` will not benefit from the + cache since the selection is deleted from memory as soon as the conversion + is finished. Instead, users should do this in two steps by first saving the + selection in a variable and then converting the saved AtomGroup. It also + means that ``ag.convert_to("RDKIT")`` followed by + ``ag.convert_to("RDKIT", NoImplicit=False)`` will not use the cache. + Finally if you're modifying the AtomGroup in place between two conversions, + the id of the AtomGroup won't change and thus the converter will use the + cached molecule. For this reason, you can pass a ``cache=False`` argument + to the converter to bypass the caching system. + Note that the cached molecule doesn't contain the coordinates of the atoms. + + + .. versionadded:: 2.0.0 + + """ + + lib = 'RDKIT' + units = {'time': None, 'length': 'Angstrom'} + _cache = dict() + + def convert(self, obj, cache=True, NoImplicit=True, max_iter=200): + """Write selection at current trajectory frame to + :class:`~rdkit.Chem.rdchem.Mol`. + + Parameters + ----------- + obj : :class:`~MDAnalysis.core.groups.AtomGroup` or :class:`~MDAnalysis.core.universe.Universe` + + cache : bool + Use a cached copy of the molecule's topology when available. To be + used, the cached molecule and the new one have to be made from the + same AtomGroup object (same id) and with the same arguments passed + to the converter (with the exception of this `cache` argument) + NoImplicit : bool + Prevent adding hydrogens to the molecule + max_iter : int + Maximum number of iterations to standardize conjugated systems. + See :func:`_rebuild_conjugated_bonds` + """ + # parameters passed to atomgroup_to_mol and used by the cache + kwargs = dict(NoImplicit=NoImplicit, max_iter=max_iter) + + try: + from rdkit import Chem + except ImportError: + raise ImportError("RDKit is required for the RDKitConverter but " + "it's not installed. Try installing it with \n" + "conda install -c conda-forge rdkit") + try: + # make sure to use atoms (Issue 46) + ag = obj.atoms + except AttributeError: + raise TypeError("No `atoms` attribute in object of type {}, " + "please use a valid AtomGroup or Universe".format( + type(obj))) from None + + if cache: + # key used to search the cache + key = f"<{id(ag):#x}>" + ",".join(f"{key}={value}" + for key, value in kwargs.items()) + try: + mol = self._cache[key] + except KeyError: + # only keep the current molecule in cache + self._cache.clear() + # create the topology + self._cache[key] = mol = self.atomgroup_to_mol(ag, **kwargs) + # continue on copy of the cached molecule + mol = copy.deepcopy(mol) + else: + self._cache.clear() + mol = self.atomgroup_to_mol(ag, **kwargs) + + # add a conformer for the current Timestep + if hasattr(ag, "positions"): + if np.isnan(ag.positions).any(): + warnings.warn("NaN detected in coordinates, the output " + "molecule will not have 3D coordinates assigned") + else: + # assign coordinates + conf = Chem.Conformer(mol.GetNumAtoms()) + for atom in mol.GetAtoms(): + idx = atom.GetIntProp("_MDAnalysis_index") + xyz = ag.positions[idx].astype(float) + conf.SetAtomPosition(atom.GetIdx(), xyz) + mol.AddConformer(conf) + # assign R/S to atoms and Z/E to bonds + Chem.AssignStereochemistryFrom3D(mol) + Chem.SetDoubleBondNeighborDirections(mol) + + return mol + + def atomgroup_to_mol(self, ag, NoImplicit=True, max_iter=200): + """Converts an AtomGroup to an RDKit molecule. + + Parameters + ----------- + ag : MDAnalysis.core.groups.AtomGroup + The AtomGroup to convert + NoImplicit : bool + Prevent adding hydrogens to the molecule + max_iter : int + Maximum number of iterations to standardize conjugated systems. + See :func:`_rebuild_conjugated_bonds` + """ + try: + elements = ag.elements + except NoDataError: + raise AttributeError( + "The `elements` attribute is required for the RDKitConverter " + "but is not present in this AtomGroup. Please refer to the " + "documentation to guess elements from other attributes or " + "type `help(mda.topology.guessers)`") from None + + if "H" not in ag.elements: + warnings.warn( + "No hydrogen atom could be found in the topology, but the " + "converter requires all hydrogens to be explicit. Please " + "check carefully the output molecule as the converter is " + "likely to add negative charges and assign incorrect bond " + "orders to structures with implicit hydrogens. Alternatively, " + "you can use the parameter `NoImplicit=False` when using the " + "converter to allow implicit hydrogens and disable inferring " + "bond orders and charges." + ) + + # attributes accepted in PDBResidueInfo object + pdb_attrs = {} + if hasattr(ag, "bfactors") and hasattr(ag, "tempfactors"): + raise AttributeError( + "Both `tempfactors` and `bfactors` attributes are present but " + "only one can be assigned to the RDKit molecule. Please " + "delete the unnecessary one and retry." + ) + for attr in RDATTRIBUTES.keys(): + if hasattr(ag, attr): + pdb_attrs[attr] = getattr(ag, attr) + + other_attrs = {} + for attr in ["charges", "segids", "types"]: + if hasattr(ag, attr): + other_attrs[attr] = getattr(ag, attr) + + mol = Chem.RWMol() + # map index in universe to index in mol + atom_mapper = {} + + for i, (atom, element) in enumerate(zip(ag, elements)): + # create atom + rdatom = Chem.Atom(element.capitalize()) + # enable/disable adding implicit H to the molecule + rdatom.SetNoImplicit(NoImplicit) + # add PDB-like properties + mi = Chem.AtomPDBResidueInfo() + for attr, values in pdb_attrs.items(): + _add_mda_attr_to_rdkit(attr, values[i], mi) + rdatom.SetMonomerInfo(mi) + # other properties + for attr in other_attrs.keys(): + value = other_attrs[attr][i] + attr = "_MDAnalysis_%s" % _TOPOLOGY_ATTRS[attr].singular + _set_atom_property(rdatom, attr, value) + _set_atom_property(rdatom, "_MDAnalysis_index", i) + # add atom + index = mol.AddAtom(rdatom) + atom_mapper[atom.ix] = index + + try: + ag.bonds + except NoDataError: + warnings.warn( + "No `bonds` attribute in this AtomGroup. Guessing bonds based " + "on atoms coordinates") + ag.guess_bonds() + + for bond in ag.bonds: + try: + bond_indices = [atom_mapper[i] for i in bond.indices] + except KeyError: + continue + bond_type = RDBONDORDER.get(bond.order, Chem.BondType.SINGLE) + mol.AddBond(*bond_indices, bond_type) + + mol.UpdatePropertyCache(strict=False) + + if NoImplicit: + # infer bond orders and formal charges from the connectivity + _infer_bo_and_charges(mol) + mol = _standardize_patterns(mol, max_iter) + + # sanitize + Chem.SanitizeMol(mol) + + return mol + + +def _add_mda_attr_to_rdkit(attr, value, mi): + """Converts an MDAnalysis atom attribute into the RDKit equivalent and + stores it into an RDKit :class:`~rdkit.Chem.rdchem.AtomPDBResidueInfo`. + + Parameters + ---------- + attr : str + Name of the atom attribute in MDAnalysis in the singular form + value : object, np.int or np.float + Attribute value as found in the AtomGroup + mi : rdkit.Chem.rdchem.AtomPDBResidueInfo + MonomerInfo object that will store the relevant atom attributes + """ + if isinstance(value, np.generic): + # convert numpy types to python standard types + value = value.item() + if attr == "names": + # RDKit needs the name to be properly formatted for a + # PDB file (1 letter elements start at col 14) + name = re.findall(r'(\D+|\d+)', value) + if len(name) == 2: + symbol, number = name + if len(number) > 2 and len(symbol) == 1: + value = "{}{}".format(symbol, number) + else: + value = "{:>2.2}{:<2.2}".format(symbol, number) + else: + # no number in the name + value = " {:<}".format(name[0]) + + # set attribute value in RDKit MonomerInfo + rdattr = RDATTRIBUTES[attr] + getattr(mi, "Set%s" % rdattr)(value) + + +def _set_atom_property(atom, attr, value): + """Saves any attribute and value into an RDKit atom property""" + if isinstance(value, (float, np.float)): + atom.SetDoubleProp(attr, float(value)) + elif isinstance(value, (int, np.int)): + atom.SetIntProp(attr, int(value)) + else: + atom.SetProp(attr, value) + + +def _infer_bo_and_charges(mol): + """Infer bond orders and formal charges from a molecule. + + Since most MD topology files don't explicitly retain information on bond + orders or charges, it has to be guessed from the topology. This is done by + looping over each atom and comparing its expected valence to the current + valence to get the Number of Unpaired Electrons (NUE). + If an atom has a negative NUE, it needs a positive formal charge (-NUE). + If two neighbouring atoms have UEs, the bond between them most + likely has to be increased by the value of the smallest NUE. + If after this process, an atom still has UEs, it needs a negative formal + charge of -NUE. + + Parameters + ---------- + mol : rdkit.Chem.rdchem.RWMol + The molecule is modified inplace and must have all hydrogens added + + Notes + ----- + This algorithm is order dependant. For example, for a carboxylate group + R-C(-O)-O the first oxygen read will receive a double bond and the other + one will be charged. It will also affect more complex conjugated systems. + """ + + for atom in sorted(mol.GetAtoms(), reverse=True, + key=lambda a: _get_nb_unpaired_electrons(a)[0]): + # get NUE for each possible valence + nue = _get_nb_unpaired_electrons(atom) + # if there's only one possible valence state and the corresponding + # NUE is negative, it means we can only add a positive charge to + # the atom + if (len(nue) == 1) and (nue[0] < 0): + atom.SetFormalCharge(-nue[0]) + mol.UpdatePropertyCache(strict=False) + # go to next atom if above case or atom has no unpaired electron + if (len(nue) == 1) and (nue[0] <= 0): + continue + else: + neighbors = sorted(atom.GetNeighbors(), reverse=True, + key=lambda a: _get_nb_unpaired_electrons(a)[0]) + # check if one of the neighbors has a common NUE + for i, na in enumerate(neighbors, start=1): + # get NUE for the neighbor + na_nue = _get_nb_unpaired_electrons(na) + # smallest common NUE + common_nue = min( + min([i for i in nue if i >= 0], default=0), + min([i for i in na_nue if i >= 0], default=0) + ) + # a common NUE of 0 means we don't need to do anything + if common_nue != 0: + # increase bond order + bond = mol.GetBondBetweenAtoms( + atom.GetIdx(), na.GetIdx()) + order = common_nue + 1 + bond.SetBondType(RDBONDORDER[order]) + mol.UpdatePropertyCache(strict=False) + if i < len(neighbors): + # recalculate nue for atom + nue = _get_nb_unpaired_electrons(atom) + + # if the atom still has unpaired electrons + nue = _get_nb_unpaired_electrons(atom)[0] + if nue > 0: + # transform it to a negative charge + atom.SetFormalCharge(-nue) + atom.SetNumRadicalElectrons(0) + mol.UpdatePropertyCache(strict=False) + + +def _get_nb_unpaired_electrons(atom): + """Calculate the number of unpaired electrons (NUE) of an atom + + Parameters + ---------- + atom: rdkit.Chem.rdchem.Atom + The atom for which the NUE will be computed + + Returns + ------- + nue : list + The NUE for each possible valence of the atom + """ + expected_vs = PERIODIC_TABLE.GetValenceList(atom.GetAtomicNum()) + current_v = atom.GetTotalValence() - atom.GetFormalCharge() + return [v - current_v for v in expected_vs] + + +def _standardize_patterns(mol, max_iter=200): + """Standardizes functional groups + + Uses :func:`_rebuild_conjugated_bonds` to standardize conjugated systems, + and SMARTS reactions for other functional groups. + Due to the way reactions work, we first have to split the molecule by + fragments. Then, for each fragment, we apply the standardization reactions. + Finally, the fragments are recombined. + + Parameters + ---------- + mol : rdkit.Chem.rdchem.RWMol + The molecule to standardize + max_iter : int + Maximum number of iterations to standardize conjugated systems + + Returns + ------- + mol : rdkit.Chem.rdchem.Mol + The standardized molecule + + Notes + ----- + The following functional groups are transformed in this order: + + +---------------+------------------------------------------------------------------------------+ + | Name | Reaction | + +===============+==============================================================================+ + | conjugated | ``[*-;!O:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]`` | + +---------------+------------------------------------------------------------------------------+ + | conjugated-N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]`` | + +---------------+------------------------------------------------------------------------------+ + | conjugated-O- | ``[O:1]=[#6:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]`` | + +---------------+------------------------------------------------------------------------------+ + | Cterm | ``[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]`` | + +---------------+------------------------------------------------------------------------------+ + | Nterm | ``[N-;X2;H1:1]>>[N+0:1]`` | + +---------------+------------------------------------------------------------------------------+ + | keto-enolate | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]`` | + +---------------+------------------------------------------------------------------------------+ + | arginine | ``[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]>>[N:1]-[C+0:2](-[N:3])=[N+:4]`` | + +---------------+------------------------------------------------------------------------------+ + | histidine | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``| + +---------------+------------------------------------------------------------------------------+ + | sulfone | ``[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]>>[S:1](=[O+0:2])=[O+0:3]`` | + +---------------+------------------------------------------------------------------------------+ + | nitro | ``[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]>>[N+:1](-[O-:2])=[O+0:3]`` | + +---------------+------------------------------------------------------------------------------+ + + """ + + # standardize conjugated systems + _rebuild_conjugated_bonds(mol, max_iter) + + fragments = [] + for reactant in Chem.GetMolFrags(mol, asMols=True): + + for name, reaction in [ + ("Cterm", "[C-;X2:1]=[O:2]>>[C+0:1]=[O:2]"), + ("Nterm", "[N-;X2;H1:1]>>[N+0:1]"), + ("keto-enolate", "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]"), + ("ARG", "[N;H1:1]-[C-;X3;H0:2](-[N;H2:3])-[N;H2:4]" + ">>[N:1]-[C+0:2](-[N:3])=[N+:4]"), + ("HIP", "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]" + ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]"), + ("sulfone", "[S;X4;v4:1](-[O-;X1:2])-[O-;X1:3]" + ">>[S:1](=[O+0:2])=[O+0:3]"), + ("nitro", "[N;X3;v3:1](-[O-;X1:2])-[O-;X1:3]" + ">>[N+:1](-[O-:2])=[O+0:3]"), + ]: + reactant.UpdatePropertyCache(strict=False) + Chem.Kekulize(reactant) + reactant = _run_reaction(reaction, reactant) + + fragments.append(reactant) + + # recombine fragments + mol = fragments.pop(0) + for fragment in fragments: + mol = Chem.CombineMols(mol, fragment) + + return mol + + +def _run_reaction(reaction, reactant): + """Runs a reaction until all reactants are transformed + + If a pattern is matched N times in the molecule, the reaction will return N + products as an array of shape (N, 1). Only the first product will be kept + and the same reaction will be reapplied to the product N times in total. + + Parameters + ---------- + reaction : str + SMARTS reaction + reactant : rdkit.Chem.rdchem.Mol + The molecule to transform + + Returns + ------- + product : rdkit.Chem.rdchem.Mol + The final product of the reaction + """ + # count how many times the reaction should be run + pattern = Chem.MolFromSmarts(reaction.split(">>")[0]) + n_matches = len(reactant.GetSubstructMatches(pattern)) + + # run the reaction for each matched pattern + rxn = AllChem.ReactionFromSmarts(reaction) + for n in range(n_matches): + products = rxn.RunReactants((reactant,)) + # only keep the first product + if products: + product = products[0][0] + # map back atom properties from the reactant to the product + _reassign_props_after_reaction(reactant, product) + # apply the next reaction to the product + product.UpdatePropertyCache(strict=False) + reactant = product + else: + # exit the n_matches loop if there's no product. Example + # where this is needed: SO^{4}_{2-} will match the sulfone + # pattern 6 times but the reaction is only needed once + break + return reactant + + +def _rebuild_conjugated_bonds(mol, max_iter=200): + """Rebuild conjugated bonds without negatively charged atoms at the + beginning and end of the conjugated system + + Depending on the order in which atoms are read during the conversion, the + :func:`_infer_bo_and_charges` function might write conjugated systems with + a double bond less and both edges of the system as anions instead of the + usual alternating single and double bonds. This function corrects this + behaviour by using an iterative procedure. + The problematic molecules always follow the same pattern: + ``anion[-*=*]n-anion`` instead of ``*=[*-*=]n*``, where ``n`` is the number + of successive single and double bonds. The goal of the iterative procedure + is to make ``n`` as small as possible by consecutively transforming + ``anion-*=*`` into ``*=*-anion`` until it reaches the smallest pattern with + ``n=1``. This last pattern is then transformed from ``anion-*=*-anion`` to + ``*=*-*=*``. + Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS, we can + control that we don't transform the same triplet of atoms back and forth by + adding their indices to a list. + The molecule needs to be kekulized first to also cover systems + with aromatic rings. + + Parameters + ---------- + mol : rdkit.Chem.rdchem.RWMol + The molecule to transform, modified inplace + max_iter : int + Maximum number of iterations performed by the function + """ + mol.UpdatePropertyCache(strict=False) + Chem.Kekulize(mol) + pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]") + # number of unique matches with the pattern + n_matches = len(set([match[0] + for match in mol.GetSubstructMatches(pattern)])) + if n_matches == 0: + # nothing to standardize + return + # check if there's an even number of anion-*=* patterns + elif n_matches % 2 == 0: + end_pattern = Chem.MolFromSmarts("[*-;!O]-[*+0]=[*+0]-[*-]") + else: + # as a last resort, the only way to standardize is to find a nitrogen + # that can accept a double bond and a positive charge + # or a carbonyl that will become an enolate + end_pattern = Chem.MolFromSmarts( + "[*-;!O]-[*+0]=[*+0]-[$([#7;X3;v3]),$([#6+0]=O)]") + backtrack = [] + for _ in range(max_iter): + # simplest case where n=1 + end_match = mol.GetSubstructMatch(end_pattern) + if end_match: + # index of each atom + anion1, a1, a2, anion2 = end_match + term_atom = mol.GetAtomWithIdx(anion2) + # [*-]-*=*-C=O + if term_atom.GetAtomicNum() == 6 and term_atom.GetFormalCharge() == 0: + for neighbor in term_atom.GetNeighbors(): + bond = mol.GetBondBetweenAtoms(anion2, neighbor.GetIdx()) + if neighbor.GetAtomicNum() == 8 and bond.GetBondTypeAsDouble() == 2: + bond.SetBondType(Chem.BondType.SINGLE) + neighbor.SetFormalCharge(-1) + else: + # [*-]-*=*-N + if term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 0: + end_charge = 1 + # [*-]-*=*-[*-] + else: + end_charge = 0 + mol.GetAtomWithIdx(anion2).SetFormalCharge(end_charge) + # common part of the conjugated systems: [*-]-*=* + mol.GetAtomWithIdx(anion1).SetFormalCharge(0) + mol.GetBondBetweenAtoms(anion1, a1).SetBondType( + Chem.BondType.DOUBLE) + mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE) + mol.GetBondBetweenAtoms(a2, anion2).SetBondType( + Chem.BondType.DOUBLE) + mol.UpdatePropertyCache(strict=False) + + # shorten the anion-anion pattern from n to n-1 + matches = mol.GetSubstructMatches(pattern) + if matches: + # check if we haven't already transformed this triplet + for match in matches: + # sort the indices for the comparison + g = tuple(sorted(match)) + if g in backtrack: + # already transformed + continue + else: + # take the first one that hasn't been tried + anion, a1, a2 = match + backtrack.append(g) + break + else: + anion, a1, a2 = matches[0] + # charges + mol.GetAtomWithIdx(anion).SetFormalCharge(0) + mol.GetAtomWithIdx(a2).SetFormalCharge(-1) + # bonds + mol.GetBondBetweenAtoms(anion, a1).SetBondType( + Chem.BondType.DOUBLE) + mol.GetBondBetweenAtoms(a1, a2).SetBondType(Chem.BondType.SINGLE) + mol.UpdatePropertyCache(strict=False) + # start new iteration + continue + + # no more changes to apply + return + + # reached max_iter + warnings.warn("The standardization could not be completed within a " + "reasonable number of iterations") + + +def _reassign_props_after_reaction(reactant, product): + """Maps back atomic properties from the reactant to the product. + The product molecule is modified inplace. + """ + for atom in product.GetAtoms(): + try: + atom.GetIntProp("old_mapno") + except KeyError: + pass + else: + atom.ClearProp("old_mapno") + idx = atom.GetUnsignedProp("react_atom_idx") + old_atom = reactant.GetAtomWithIdx(idx) + for prop, value in old_atom.GetPropsAsDict().items(): + _set_atom_property(atom, prop, value) + # fix bonds with "crossed" stereo + for bond in atom.GetBonds(): + if bond.GetStereo() == Chem.BondStereo.STEREOANY: + bond.SetStereo(Chem.BondStereo.STEREONONE) + atom.ClearProp("react_atom_idx") diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 31f6da06f71..274ace30947 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -121,7 +121,7 @@ def _unpickle_uag(basepickle, selections, selstrs): def make_classes(): - """Make a fresh copy of all classes + r"""Make a fresh copy of all classes Returns ------- @@ -129,7 +129,6 @@ def make_classes(): to serve as bases for :class:`~MDAnalysis.core.universe.Universe`\ -specific MDA container classes. Another with the final merged versions of those classes. The classes themselves are used as hashing keys. - """ bases = {} classes = {} @@ -356,7 +355,7 @@ def __getattr__(self, attr): raise NoDataError(err.format(selfcls=selfcls, attrname=attrname, topattr=topattr)) - + else: clean = attr.lower().replace('_', '') err = '{selfcls} has no attribute {attr}. '.format(selfcls=selfcls, @@ -410,7 +409,7 @@ def wrapped(self, other): class GroupBase(_MutableBase): - """Base class from which a :class:`<~MDAnalysis.core.universe.Universe`\ 's + r"""Base class from which a :class:`<~MDAnalysis.core.universe.Universe`\ 's Group class is built. Instances of :class:`GroupBase` provide the following operations that @@ -525,7 +524,7 @@ def __getitem__(self, item): # subclasses, such as UpdatingAtomGroup, to control the class # resulting from slicing. return self._derived_class(self.ix[item], self.universe) - + def __getattr__(self, attr): selfcls = type(self).__name__ if attr in _TOPOLOGY_ATTRS: @@ -831,12 +830,18 @@ def center(self, weights, pbc=False, compound='group', unwrap=False): # Sort positions and weights by compound index and promote to dtype if # required: - sort_indices = np.argsort(compound_indices) + + # are we already sorted? argsorting and fancy-indexing can be expensive + if np.any(np.diff(compound_indices) < 0): + sort_indices = np.argsort(compound_indices) + else: + sort_indices = slice(None) compound_indices = compound_indices[sort_indices] # Unwrap Atoms if unwrap: - coords = atoms.unwrap(compound=comp, reference=None, inplace=False) + coords = atoms.unwrap(compound=comp, reference=None, + inplace=False)[sort_indices] else: coords = atoms.positions[sort_indices] if weights is None: @@ -921,7 +926,7 @@ def center_of_geometry(self, pbc=False, compound='group', unwrap=False): @warn_if_not_unique def accumulate(self, attribute, function=np.sum, compound='group'): - """Accumulates the attribute associated with (compounds of) the group. + r"""Accumulates the attribute associated with (compounds of) the group. Accumulates the attribute of :class:`Atoms` in the group. The accumulation per :class:`Residue`, :class:`Segment`, molecule, @@ -1347,7 +1352,7 @@ def pack_into_box(self, box=None, inplace=True): return self.wrap(box=box, inplace=inplace) def wrap(self, compound="atoms", center="com", box=None, inplace=True): - """Shift the contents of this group back into the primary unit cell + r"""Shift the contents of this group back into the primary unit cell according to periodic boundary conditions. Specifying a `compound` will keep the :class:`Atoms` in each @@ -1368,8 +1373,8 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n - ``[lx, ly, lz, alpha, beta, gamma]``.\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: + ``[lx, ly, lz, alpha, beta, gamma]``. If ``None``, uses the dimensions of the current time step. inplace: bool, optional @@ -1406,14 +1411,14 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): .. math:: - x_i' = x_i - \left\lfloor\\frac{x_i}{L_i}\\right\\rfloor\,. + x_i' = x_i - \left\lfloor\frac{x_i}{L_i}\right\rfloor\,. When specifying a `compound`, the translation is calculated based on each compound. The same translation is applied to all atoms within this compound, meaning it will not be broken by the shift. This might however mean that not all atoms of a compound will be inside the unit cell after wrapping, but rather will be the center of - the compound.\n + the compound. Be aware of the fact that only atoms *belonging to the group* will be taken into account! @@ -1548,7 +1553,7 @@ def wrap(self, compound="atoms", center="com", box=None, inplace=True): return positions def unwrap(self, compound='fragments', reference='com', inplace=True): - """Move atoms of this group so that bonds within the + r"""Move atoms of this group so that bonds within the group's compounds aren't split across periodic boundaries. This function is most useful when atoms have been packed into the @@ -1601,7 +1606,7 @@ def unwrap(self, compound='fragments', reference='com', inplace=True): ---- Be aware of the fact that only atoms *belonging to the group* will be unwrapped! If you want entire molecules to be unwrapped, make sure - that all atoms of these molecules are part of the group.\n + that all atoms of these molecules are part of the group. An AtomGroup containing all atoms of all fragments in the group ``ag`` can be created with:: @@ -2486,7 +2491,7 @@ def unique(self): @property def positions(self): - """Coordinates of the :class:`Atoms` in the :class:`AtomGroup`. + r"""Coordinates of the :class:`Atoms` in the :class:`AtomGroup`. A :class:`numpy.ndarray` with :attr:`~numpy.ndarray.shape`\ ``=(``\ :attr:`~AtomGroup.n_atoms`\ ``, 3)`` @@ -2524,7 +2529,7 @@ def positions(self, values): @property def velocities(self): - """Velocities of the :class:`Atoms` in the :class:`AtomGroup`. + r"""Velocities of the :class:`Atoms` in the :class:`AtomGroup`. A :class:`numpy.ndarray` with :attr:`~numpy.ndarray.shape`\ ``=(``\ :attr:`~AtomGroup.n_atoms`\ ``, 3)`` @@ -2553,7 +2558,7 @@ def velocities(self, values): @property def forces(self): - """Forces on each :class:`Atom` in the :class:`AtomGroup`. + r"""Forces on each :class:`Atom` in the :class:`AtomGroup`. A :class:`numpy.ndarray` with :attr:`~numpy.ndarray.shape`\ ``=(``\ :attr:`~AtomGroup.n_atoms`\ ``, 3)`` @@ -2737,6 +2742,10 @@ def select_atoms(self, sel, *othersel, **selgroups): record_type *record_type* for selecting either ATOM or HETATM from PDB-like files. e.g. ``select_atoms('name CA and not record_type HETATM')`` + smarts *SMARTS-query* + select atoms using Daylight's SMARTS queries, e.g. ``smarts + [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. + All matches (max 1000) are combined as a unique match **Boolean** @@ -2883,6 +2892,8 @@ def select_atoms(self, sel, *othersel, **selgroups): equivalent ``global group`` selection. Removed flags affecting default behaviour for periodic selections; periodic are now on by default (as with default flags) + .. versionchanged:: 2.0.0 + Added the *smarts* selection. """ if not sel: @@ -3585,7 +3596,7 @@ def unique(self): @functools.total_ordering class ComponentBase(_MutableBase): - """Base class from which a :class:`~MDAnalysis.core.universe.Universe`\ 's + r"""Base class from which a :class:`~MDAnalysis.core.universe.Universe`\ 's Component class is built. Components are the individual objects that are found in Groups. diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index bdb156ff249..d3589caf6d4 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -297,7 +297,7 @@ def __init__(self, parser, tokens): self.inRadius = float(tokens.popleft()) self.exRadius = float(tokens.popleft()) self.sel = parser.parse_expression(self.precedence) - + @return_empty_on_apply def apply(self, group): indices = [] @@ -515,7 +515,7 @@ def apply(self, group): return group[mask] -class StringSelection(Selection): +class _ProtoStringSelection(Selection): """Selections based on text attributes .. versionchanged:: 1.0.0 @@ -530,11 +530,23 @@ def __init__(self, parser, tokens): @return_empty_on_apply def apply(self, group): - mask = np.zeros(len(group), dtype=bool) - for val in self.values: - values = getattr(group, self.field) - mask |= [fnmatch.fnmatch(x, val) for x in values] - return group[mask].unique + # rather than work on group.names, cheat and look at the lookup table + nmattr = getattr(group.universe._topology, self.field) + + matches = [] # list of passing indices + # iterate through set of known atom names, check which pass + for nm, ix in nmattr.namedict.items(): + if any(fnmatch.fnmatchcase(nm, val) for val in self.values): + matches.append(ix) + + # atomname indices for members of this group + nmidx = nmattr.nmidx[getattr(group, self.level)] + + return group[np.in1d(nmidx, matches)].unique + + +class StringSelection(_ProtoStringSelection): + level = 'ix' # operates on atom level attribute, i.e. '.ix' class AtomNameSelection(StringSelection): @@ -561,22 +573,27 @@ class AtomICodeSelection(StringSelection): field = 'icodes' -class ResidueNameSelection(StringSelection): +class _ResidueStringSelection(_ProtoStringSelection): + level= 'resindices' + + +class ResidueNameSelection(_ResidueStringSelection): """Select atoms based on 'resnames' attribute""" token = 'resname' field = 'resnames' -class MoleculeTypeSelection(StringSelection): +class MoleculeTypeSelection(_ResidueStringSelection): """Select atoms based on 'moltypes' attribute""" token = 'moltype' field = 'moltypes' -class SegmentNameSelection(StringSelection): +class SegmentNameSelection(_ProtoStringSelection): """Select atoms based on 'segids' attribute""" token = 'segid' field = 'segids' + level = 'segindices' class AltlocSelection(StringSelection): @@ -587,8 +604,8 @@ class AltlocSelection(StringSelection): class AromaticSelection(Selection): """Select aromatic atoms. - - Aromaticity is available in the `aromaticities` attribute and is made + + Aromaticity is available in the `aromaticities` attribute and is made available through RDKit""" token = 'aromatic' field = 'aromaticities' @@ -600,6 +617,60 @@ def apply(self, group): return group[group.aromaticities].unique +class SmartsSelection(Selection): + """Select atoms based on SMARTS queries. + + Uses RDKit to run the query and converts the result to MDAnalysis. + Supports chirality. + """ + token = 'smarts' + + def __init__(self, parser, tokens): + # The parser will add spaces around parentheses and then split the + # selection based on spaces to create the tokens + # If the input SMARTS query contained parentheses, the query will be + # split because of that and we need to reconstruct it + # We also need to keep the parentheses that are not part of the smarts + # query intact + pattern = [] + counter = {"(": 0, ")": 0} + # loop until keyword but ignore parentheses as a keyword + while tokens[0] in ["(", ")"] or not is_keyword(tokens[0]): + # keep track of the number of open and closed parentheses + if tokens[0] in ["(", ")"]: + counter[tokens[0]] += 1 + # if the char is a closing ")" but there's no corresponding + # open "(" then we've reached then end of the smarts query and + # the current token ")" is part of a grouping parenthesis + if tokens[0] == ")" and counter["("] < (counter[")"]): + break + # add the token to the pattern and remove it from the tokens + val = tokens.popleft() + pattern.append(val) + self.pattern = "".join(pattern) + + def apply(self, group): + try: + from rdkit import Chem + except ImportError: + raise ImportError("RDKit is required for SMARTS-based atom " + "selection but it's not installed. Try " + "installing it with \n" + "conda install -c conda-forge rdkit") + pattern = Chem.MolFromSmarts(self.pattern) + if not pattern: + raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") + mol = group.convert_to("RDKIT") + matches = mol.GetSubstructMatches(pattern, useChirality=True) + # convert rdkit indices to mdanalysis' + indices = [ + mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") + for match in matches for idx in match] + # create boolean mask for atoms based on index + mask = np.in1d(range(group.n_atoms), np.unique(indices)) + return group[mask] + + class ResidSelection(Selection): """Select atoms based on numerical fields @@ -620,7 +691,7 @@ def __init__(self, parser, tokens): lowers = [] for val in values: - m1 = re.match("(\d+)(\w?)$", val) + m1 = re.match(r"(\d+)(\w?)$", val) if not m1 is None: res = m1.groups() lower = int(res[0]), res[1] @@ -628,7 +699,7 @@ def __init__(self, parser, tokens): else: # check if in appropriate format 'lower:upper' or 'lower-upper' # each val is one or more digits, maybe a letter - selrange = re.match("(\d+)(\w?)[:-](\d+)(\w?)", val) + selrange = re.match(r"(\d+)(\w?)[:-](\d+)(\w?)", val) if selrange is None: # re.match returns None on failure raise ValueError("Failed to parse value: {0}".format(val)) res = selrange.groups() @@ -740,7 +811,7 @@ def __init__(self, parser, tokens): upper = None except ValueError: # check if in appropriate format 'lower:upper' or 'lower-upper' - selrange = re.match("(\d+)[:-](\d+)", val) + selrange = re.match(r"(\d+)[:-](\d+)", val) if not selrange: errmsg = f"Failed to parse number: {val}" raise ValueError(errmsg) from None @@ -802,10 +873,15 @@ class ProteinSelection(Selection): See Also -------- :func:`MDAnalysis.lib.util.convert_aa_code` + + + .. versionchanged:: 2.0.0 + prot_res changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'protein' - prot_res = np.array([ + prot_res = { # CHARMM top_all27_prot_lipid.rtf 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD', 'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', @@ -828,14 +904,20 @@ class ProteinSelection(Selection): 'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE', 'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS', 'CCYX', 'CMET', 'CME', 'ASF', - ]) + } def __init__(self, parser, tokens): pass def apply(self, group): - mask = np.in1d(group.resnames, self.prot_res) - return group[mask].unique + resname_attr = group.universe._topology.resnames + # which values in resname attr are in prot_res? + matches = [ix for (nm, ix) in resname_attr.namedict.items() + if nm in self.prot_res] + # index of each atom's resname + nmidx = resname_attr.nmidx[group.resindices] + # intersect atom's resname index and matches to prot_res + return group[np.in1d(nmidx, matches)].unique class NucleicSelection(Selection): @@ -850,23 +932,32 @@ class NucleicSelection(Selection): .. versionchanged:: 0.8 additional Gromacs selections + .. versionchanged:: 2.0.0 + nucl_res changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleic' - nucl_res = np.array([ + nucl_res = { 'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA', 'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G', 'DA5', 'DC5', 'DG5', 'DT5', 'DA3', 'DC3', 'DG3', 'DT3', 'RA5', 'RU5', 'RG5', 'RC5', 'RA3', 'RU3', 'RG3', 'RC3' - ]) + } def __init__(self, parser, tokens): pass def apply(self, group): - mask = np.in1d(group.resnames, self.nucl_res) + resnames = group.universe._topology.resnames + nmidx = resnames.nmidx[group.resindices] + + matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + mask = np.in1d(nmidx, matches) + return group[mask].unique @@ -875,14 +966,32 @@ class BackboneSelection(ProteinSelection): This excludes OT* on C-termini (which are included by, eg VMD's backbone selection). + + + .. versionchanged:: 2.0.0 + bb_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'backbone' - bb_atoms = np.array(['N', 'CA', 'C', 'O']) + bb_atoms = {'N', 'CA', 'C', 'O'} def apply(self, group): - mask = np.in1d(group.names, self.bb_atoms) - mask &= np.in1d(group.resnames, self.prot_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.bb_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.prot_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class NucleicBackboneSelection(NucleicSelection): @@ -890,14 +999,32 @@ class NucleicBackboneSelection(NucleicSelection): These atoms are only recognized if they are in a residue matched by the :class:`NucleicSelection`. + + + .. versionchanged:: 2.0.0 + bb_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicbackbone' - bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"]) + bb_atoms = {"P", "C5'", "C3'", "O3'", "O5'"} def apply(self, group): - mask = np.in1d(group.names, self.bb_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.bb_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class BaseSelection(NucleicSelection): @@ -907,29 +1034,65 @@ class BaseSelection(NucleicSelection): 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 'O6','N2','N6', 'O2','N4','O4','C5M' + + + .. versionchanged:: 2.0.0 + base_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicbase' - base_atoms = np.array([ + base_atoms = { 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', 'O6', 'N2', 'N6', - 'O2', 'N4', 'O4', 'C5M']) + 'O2', 'N4', 'O4', 'C5M'} def apply(self, group): - mask = np.in1d(group.names, self.base_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.base_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class NucleicSugarSelection(NucleicSelection): """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'. + + + .. versionchanged:: 2.0.0 + sug_atoms changed to set (from numpy array) + performance improved by ~100x on larger systems """ token = 'nucleicsugar' - sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"]) + sug_atoms = {"C1'", "C2'", "C3'", "C4'", "O4'"} def apply(self, group): - mask = np.in1d(group.names, self.sug_atoms) - mask &= np.in1d(group.resnames, self.nucl_res) - return group[mask].unique + atomnames = group.universe._topology.names + resnames = group.universe._topology.resnames + + # filter by atom names + name_matches = [ix for (nm, ix) in atomnames.namedict.items() + if nm in self.sug_atoms] + nmidx = atomnames.nmidx[group.ix] + group = group[np.in1d(nmidx, name_matches)] + + # filter by resnames + resname_matches = [ix for (nm, ix) in resnames.namedict.items() + if nm in self.nucl_res] + nmidx = resnames.nmidx[group.resindices] + group = group[np.in1d(nmidx, resname_matches)] + + return group.unique class PropertySelection(Selection): @@ -1044,7 +1207,7 @@ class SameSelection(Selection): Selects all atoms that have the same subkeyword value as any atom in selection .. versionchanged:: 1.0.0 - Map :code:`"residue"` to :code:`"resindices"` and :code:`"segment"` to + Map :code:`"residue"` to :code:`"resindices"` and :code:`"segment"` to :code:`"segindices"` (see #2669 and #2672) """ diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index c600ada3eb1..46b8eb65556 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -21,7 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""\ +r""" Topology attribute objects --- :mod:`MDAnalysis.core.topologyattrs` =================================================================== @@ -31,6 +31,7 @@ TopologyAttrs are used to contain attributes such as atom names or resids. These are usually read by the TopologyParser. """ + import Bio.Seq import Bio.SeqRecord from collections import defaultdict @@ -473,8 +474,65 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, na + 1) +class _AtomStringAttr(AtomAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(na)], dtype=object) + + @_check_length + def set_atoms(self, ag, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[ag.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Atomnames(AtomAttr): +class Atomnames(_AtomStringAttr): """Name for each atom. """ attrname = 'names' @@ -483,10 +541,6 @@ class Atomnames(AtomAttr): dtype = object transplants = defaultdict(list) - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'): """Select AtomGroup corresponding to the phi protein backbone dihedral C'-N-CA-C. @@ -536,7 +590,7 @@ def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'): transplants[Residue].append(('phi_selection', phi_selection)) def phi_selections(residues, c_name='C', n_name='N', ca_name='CA'): - """Select list of AtomGroups corresponding to the phi protein + """Select list of AtomGroups corresponding to the phi protein backbone dihedral C'-N-CA-C. Parameters @@ -655,13 +709,13 @@ def psi_selection(residue, c_name='C', n_name='N', ca_name='CA'): transplants[Residue].append(('psi_selection', psi_selection)) def _get_next_residues_by_resid(residues): - """Select list of Residues corresponding to the next resid for each + """Select list of Residues corresponding to the next resid for each residue in `residues`. Returns ------- List of Residues - List of the next residues in the Universe, by resid and segid. + List of the next residues in the Universe, by resid and segid. If not found, the corresponding item in the list is ``None``. .. versionadded:: 1.0.0 @@ -695,13 +749,13 @@ def _get_next_residues_by_resid(residues): _get_next_residues_by_resid)) def _get_prev_residues_by_resid(residues): - """Select list of Residues corresponding to the previous resid for each + """Select list of Residues corresponding to the previous resid for each residue in `residues`. Returns ------- List of Residues - List of the previous residues in the Universe, by resid and segid. + List of the previous residues in the Universe, by resid and segid. If not found, the corresponding item in the list is ``None``. .. versionadded:: 1.0.0 @@ -727,7 +781,7 @@ def _get_prev_residues_by_resid(residues): _get_prev_residues_by_resid)) def psi_selections(residues, c_name='C', n_name='N', ca_name='CA'): - """Select list of AtomGroups corresponding to the psi protein + """Select list of AtomGroups corresponding to the psi protein backbone dihedral N-CA-C-N'. Parameters @@ -743,7 +797,7 @@ def psi_selections(residues, c_name='C', n_name='N', ca_name='CA'): ------- List of AtomGroups 4-atom selections in the correct order. If no N' found in the - following residue (by resid) then the corresponding item in the + following residue (by resid) then the corresponding item in the list is ``None``. .. versionadded:: 1.0.0 @@ -831,7 +885,7 @@ def omega_selection(residue, c_name='C', n_name='N', ca_name='CA'): transplants[Residue].append(('omega_selection', omega_selection)) def omega_selections(residues, c_name='C', n_name='N', ca_name='CA'): - """Select list of AtomGroups corresponding to the omega protein + """Select list of AtomGroups corresponding to the omega protein backbone dihedral CA-C-N'-CA'. omega describes the -C-N- peptide bond. Typically, it is trans (180 @@ -851,7 +905,7 @@ def omega_selections(residues, c_name='C', n_name='N', ca_name='CA'): ------- List of AtomGroups 4-atom selections in the correct order. If no C' found in the - previous residue (by resid) then the corresponding item in the + previous residue (by resid) then the corresponding item in the list is ``None``. .. versionadded:: 1.0.0 @@ -920,7 +974,7 @@ def chi1_selection(residue, n_name='N', ca_name='CA', cb_name='CB', def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB', cg_name='CG'): - """Select list of AtomGroups corresponding to the chi1 sidechain dihedral + """Select list of AtomGroups corresponding to the chi1 sidechain dihedral N-CA-CB-CG. Parameters @@ -958,20 +1012,16 @@ def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB', # TODO: update docs to property doc -class Atomtypes(AtomAttr): +class Atomtypes(_AtomStringAttr): """Type for each atom""" attrname = 'types' singular = 'type' per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - # TODO: update docs to property doc -class Elements(AtomAttr): +class Elements(_AtomStringAttr): """Element for each atom""" attrname = 'elements' singular = 'element' @@ -995,7 +1045,7 @@ def _gen_initial_values(na, nr, ns): return np.zeros(na) -class RecordTypes(AtomAttr): +class RecordTypes(_AtomStringAttr): """For PDB-like formats, indicates if ATOM or HETATM Defaults to 'ATOM' @@ -1013,7 +1063,7 @@ def _gen_initial_values(na, nr, ns): return np.array(['ATOM'] * na, dtype=object) -class ChainIDs(AtomAttr): +class ChainIDs(_AtomStringAttr): """ChainID per atom Note @@ -1025,10 +1075,6 @@ class ChainIDs(AtomAttr): per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - class Tempfactors(AtomAttr): """Tempfactor for atoms""" @@ -1070,7 +1116,7 @@ def get_residues(self, rg): if isinstance(rg._ix, numbers.Integral): # for a single residue - masses = self.values[resatoms].sum() + masses = self.values[tuple(resatoms)].sum() else: # for a residuegroup masses = np.empty(len(rg)) @@ -1094,7 +1140,7 @@ def get_segments(self, sg): @warn_if_not_unique @check_pbc_and_unwrap def center_of_mass(group, pbc=False, compound='group', unwrap=False): - """Center of mass of (compounds of) the group. + r"""Center of mass of (compounds of) the group. Computes the center of mass of :class:`Atoms` in the group. Centers of mass per :class:`Residue`, :class:`Segment`, molecule, or @@ -1155,7 +1201,7 @@ def center_of_mass(group, pbc=False, compound='group', unwrap=False): @warn_if_not_unique def total_mass(group, compound='group'): - """Total mass of (compounds of) the group. + r"""Total mass of (compounds of) the group. Computes the total mass of :class:`Atoms` in the group. Total masses per :class:`Residue`, :class:`Segment`, molecule, or @@ -1422,7 +1468,7 @@ def principal_axes(group, pbc=False): .. versionchanged:: 0.8 Added *pbc* keyword - .. versionchanged:: 1.0.0 + .. versionchanged:: 1.0.0 Always return principal axes in right-hand convention. """ @@ -1491,7 +1537,7 @@ def get_residues(self, rg): resatoms = self.top.tt.residues2atoms_2d(rg.ix) if isinstance(rg._ix, numbers.Integral): - charges = self.values[resatoms].sum() + charges = self.values[tuple(resatoms)].sum() else: charges = np.empty(len(rg)) for i, row in enumerate(resatoms): @@ -1513,7 +1559,7 @@ def get_segments(self, sg): @warn_if_not_unique def total_charge(group, compound='group'): - """Total charge of (compounds of) the group. + r"""Total charge of (compounds of) the group. Computes the total charge of :class:`Atoms` in the group. Total charges per :class:`Residue`, :class:`Segment`, molecule, or @@ -1574,7 +1620,7 @@ def _gen_initial_values(na, nr, ns): # TODO: update docs to property doc -class AltLocs(AtomAttr): +class AltLocs(_AtomStringAttr): """AltLocs for each atom""" attrname = 'altLocs' singular = 'altLoc' @@ -1727,8 +1773,65 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) +class _ResidueStringAttr(ResidueAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_residues(self, rg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[rg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Resnames(ResidueAttr): +class Resnames(_ResidueStringAttr): attrname = 'resnames' singular = 'resname' transplants = defaultdict(list) @@ -1847,18 +1950,14 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) -class ICodes(ResidueAttr): +class ICodes(_ResidueStringAttr): """Insertion code for Atoms""" attrname = 'icodes' singular = 'icode' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(nr)], dtype=object) - -class Moltypes(ResidueAttr): +class Moltypes(_ResidueStringAttr): """Name of the molecule type Two molecules that share a molecule type share a common template topology. @@ -1910,8 +2009,65 @@ def set_segments(self, sg, values): self.values[sg.ix] = values +class _SegmentStringAttr(SegmentAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_segments(self, sg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[sg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Segids(SegmentAttr): +class Segids(_SegmentStringAttr): attrname = 'segids' singular = 'segid' transplants = defaultdict(list) @@ -2113,7 +2269,7 @@ def fragindex(self): return self.universe._fragdict[self.ix].ix def fragindices(self): - """The + r"""The :class:`fragment indices` of all :class:`Atoms` in this :class:`~MDAnalysis.core.groups.AtomGroup`. diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index cbf8041359a..e4596ee6aba 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -138,11 +138,8 @@ def _topology_from_file_like(topology_file, topology_format=None, "Error: {2}".format(topology_file, parser, err)) return topology -# py3 TODO -#def _resolve_formats(*coordinates, format=None, topology_format=None): -def _resolve_formats(*coordinates, **kwargs): - format = kwargs.get('format', None) - topology_format = kwargs.get('topology_format', None) + +def _resolve_formats(*coordinates, format=None, topology_format=None): if not coordinates: if format is None: format = topology_format @@ -150,15 +147,9 @@ def _resolve_formats(*coordinates, **kwargs): topology_format = format return format, topology_format -# py3 TODO -#def _resolve_coordinates(filename, *coordinates, format=None, -# all_coordinates=False): -def _resolve_coordinates(*args, **kwargs): - filename = args[0] - coordinates = args[1:] - format = kwargs.get('format', None) - all_coordinates = kwargs.get('all_coordinates', False) +def _resolve_coordinates(filename, *coordinates, format=None, + all_coordinates=False): if all_coordinates or not coordinates and filename is not None: try: get_reader_for(filename, format=format) @@ -287,6 +278,7 @@ class Universe(object): :mod:`ChainReader`, which contains the functionality to treat independent trajectory files as a single virtual trajectory. + **kwargs: extra arguments are passed to the topology parser. Attributes ---------- @@ -310,23 +302,10 @@ class Universe(object): ``topology`` and ``trajectory`` are reserved upon unpickle. """ -# Py3 TODO -# def __init__(self, topology=None, *coordinates, all_coordinates=False, -# format=None, topology_format=None, transformations=None, -# guess_bonds=False, vdwradii=None, -# in_memory=False, in_memory_step=1, -# **kwargs): - def __init__(self, *args, **kwargs): - topology = args[0] if args else None - coordinates = args[1:] - all_coordinates = kwargs.pop('all_coordinates', False) - format = kwargs.pop('format', None) - topology_format = kwargs.pop('topology_format', None) - transformations = kwargs.pop('transformations', None) - guess_bonds = kwargs.pop('guess_bonds', False) - vdwradii = kwargs.pop('vdwradii', None) - in_memory = kwargs.pop('in_memory', False) - in_memory_step = kwargs.pop('in_memory_step', 1) + def __init__(self, topology=None, *coordinates, all_coordinates=False, + format=None, topology_format=None, transformations=None, + guess_bonds=False, vdwradii=None, in_memory=False, + in_memory_step=1, **kwargs): self._trajectory = None # managed attribute holding Reader self._cache = {} @@ -687,15 +666,21 @@ def __repr__(self): return "".format( n_atoms=len(self.atoms)) - def __getstate__(self): - # Universe's two "legs" of topology and traj both serialise themselves - return self._topology, self._trajectory + @classmethod + def _unpickle_U(cls, top, traj): + """Special method used by __reduce__ to deserialise a Universe""" + # top is a Topology obj at this point, but Universe can handle that. + u = cls(top) + u.trajectory = traj - def __setstate__(self, args): - self._topology = args[0] - _generate_from_topology(self) + return u - self._trajectory = args[1] + def __reduce__(self): + # __setstate__/__getstate__ will raise an error when Universe has a + # transformation (that has AtomGroup inside). Use __reduce__ instead. + # Universe's two "legs" of top and traj both serialise themselves. + return (self._unpickle_U, (self._topology, + self._trajectory)) # Properties @property @@ -1324,32 +1309,6 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, return cls(mol, **kwargs) -# TODO: what is the point of this function??? -def as_Universe(*args, **kwargs): - """Return a universe from the input arguments. - - 1. If the first argument is a universe, just return it:: - - as_Universe(universe) --> universe - - 2. Otherwise try to build a universe from the first or the first - and second argument:: - - as_Universe(PDB, **kwargs) --> Universe(PDB, **kwargs) - as_Universe(PSF, DCD, **kwargs) --> Universe(PSF, DCD, **kwargs) - as_Universe(*args, **kwargs) --> Universe(*args, **kwargs) - - Returns - ------- - :class:`~MDAnalysis.core.groups.Universe` - """ - if len(args) == 0: - raise TypeError("as_Universe() takes at least one argument (%d given)" % len(args)) - elif len(args) == 1 and isinstance(args[0], Universe): - return args[0] - return Universe(*args, **kwargs) - - def Merge(*args): """Create a new new :class:`Universe` from one or more :class:`~MDAnalysis.core.groups.AtomGroup` instances. diff --git a/package/MDAnalysis/lib/distances.py b/package/MDAnalysis/lib/distances.py index e1755511b5c..74782a67797 100644 --- a/package/MDAnalysis/lib/distances.py +++ b/package/MDAnalysis/lib/distances.py @@ -195,7 +195,7 @@ def distance_array(reference, configuration, box=None, result=None, box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array which must have the shape ``(n, m)`` and dtype @@ -263,7 +263,7 @@ def self_distance_array(reference, box=None, result=None, backend="serial"): box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array which must have the shape ``(n*(n-1)/2,)`` and @@ -346,7 +346,7 @@ def capped_distance(reference, configuration, max_cutoff, min_cutoff=None, box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. method : {'bruteforce', 'nsgrid', 'pkdtree'}, optional Keyword to override the automatic guessing of the employed search @@ -422,7 +422,7 @@ def _determine_method(reference, configuration, max_cutoff, min_cutoff=None, box : numpy.ndarray The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. method : {'bruteforce', 'nsgrid', 'pkdtree'}, optional Keyword to override the automatic guessing of the employed search @@ -498,7 +498,7 @@ def _bruteforce_capped(reference, configuration, max_cutoff, min_cutoff=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. return_distances : bool, optional If set to ``True``, distances will also be returned. @@ -575,7 +575,7 @@ def _pkdtree_capped(reference, configuration, max_cutoff, min_cutoff=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. return_distances : bool, optional If set to ``True``, distances will also be returned. @@ -657,7 +657,7 @@ def _nsgrid_capped(reference, configuration, max_cutoff, min_cutoff=None, box : numpy.ndarray (``dtype=numpy.float64``, ``shape=(n_pairs,)``), optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. return_distances : bool, optional If set to ``True``, distances will also be returned. @@ -749,7 +749,7 @@ def self_capped_distance(reference, max_cutoff, min_cutoff=None, box=None, box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. method : {'bruteforce', 'nsgrid', 'pkdtree'}, optional Keyword to override the automatic guessing of the employed search @@ -825,7 +825,7 @@ def _determine_method_self(reference, max_cutoff, min_cutoff=None, box=None, box : numpy.ndarray The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. method : {'bruteforce', 'nsgrid', 'pkdtree'}, optional Keyword to override the automatic guessing of the employed search @@ -889,7 +889,7 @@ def _bruteforce_capped_self(reference, max_cutoff, min_cutoff=None, box=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. return_distances : bool, optional If set to ``True``, distances will also be returned. @@ -964,7 +964,7 @@ def _pkdtree_capped_self(reference, max_cutoff, min_cutoff=None, box=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. return_distances : bool, optional If set to ``True``, distances will also be returned. @@ -1040,7 +1040,7 @@ def _nsgrid_capped_self(reference, max_cutoff, min_cutoff=None, box=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. Returns @@ -1125,7 +1125,7 @@ def transform_RtoS(coords, box, backend="serial"): box : numpy.ndarray The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. backend : {'serial', 'OpenMP'}, optional Keyword selecting the type of acceleration. @@ -1174,7 +1174,7 @@ def transform_StoR(coords, box, backend="serial"): box : numpy.ndarray The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. backend : {'serial', 'OpenMP'}, optional Keyword selecting the type of acceleration. @@ -1238,7 +1238,7 @@ def calc_bonds(coords1, coords2, box=None, result=None, backend="serial"): box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array of dtype ``numpy.float64`` and shape ``(n,)`` @@ -1327,7 +1327,7 @@ def calc_angles(coords1, coords2, coords3, box=None, result=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array of dtype ``numpy.float64`` and shape ``(n,)`` @@ -1379,7 +1379,7 @@ def calc_angles(coords1, coords2, coords3, box=None, result=None, @check_coords('coords1', 'coords2', 'coords3', 'coords4') def calc_dihedrals(coords1, coords2, coords3, coords4, box=None, result=None, backend="serial"): - """Calculates the dihedral angles formed between quadruplets of positions + r"""Calculates the dihedral angles formed between quadruplets of positions from the four coordinate arrays `coords1`, `coords2`, `coords3`, and `coords4`, which must contain the same number of coordinates. @@ -1429,7 +1429,7 @@ def calc_dihedrals(coords1, coords2, coords3, coords4, box=None, result=None, box : numpy.ndarray, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array of dtype ``numpy.float64`` and shape ``(n,)`` @@ -1493,7 +1493,7 @@ def apply_PBC(coords, box, backend="serial"): box : numpy.ndarray The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. backend : {'serial', 'OpenMP'}, optional Keyword selecting the type of acceleration. diff --git a/package/MDAnalysis/lib/transformations.py b/package/MDAnalysis/lib/transformations.py index 2723c53dec6..c1079d49784 100644 --- a/package/MDAnalysis/lib/transformations.py +++ b/package/MDAnalysis/lib/transformations.py @@ -65,14 +65,14 @@ Documentation in HTML format can be generated with epydoc. -Matrices (M) can be inverted using numpy.linalg.inv(M), concatenated using -numpy.dot(M0, M1), or used to transform homogeneous coordinates (v) using -numpy.dot(M, v) for shape (4, \*) "point of arrays", respectively -numpy.dot(v, M.T) for shape (\*, 4) "array of points". +Matrices (M) can be inverted using ``numpy.linalg.inv(M)``, concatenated using +``numpy.dot(M0, M1)``, or used to transform homogeneous coordinates (v) using +``numpy.dot(M, v)`` for shape ``(4, *)`` "point of arrays", respectively +``numpy.dot(v, M.T)`` for shape ``(*, 4)`` "array of points". -Use the transpose of transformation matrices for OpenGL glMultMatrixd(). +Use the transpose of transformation matrices for OpenGL ``glMultMatrixd()``. -Calculations are carried out with numpy.float64 precision. +Calculations are carried out with ``numpy.float64`` precision. Vector, point, quaternion, and matrix function arguments are expected to be "array like", i.e. tuple, list, or numpy arrays. @@ -81,23 +81,23 @@ Angles are in radians unless specified otherwise. -Quaternions w+ix+jy+kz are represented as [w, x, y, z]. +Quaternions w+ix+jy+kz are represented as ``[w, x, y, z]``. A triple of Euler angles can be applied/interpreted in 24 ways, which can be specified using a 4 character string or encoded 4-tuple: - *Axes 4-string*: e.g. 'sxyz' or 'ryxy' + - *Axes 4-string*: e.g. 'sxyz' or 'ryxy' - - first character : rotations are applied to 's'tatic or 'r'otating frame - - remaining characters : successive rotation axis 'x', 'y', or 'z' + - first character : rotations are applied to 's'tatic or 'r'otating frame + - remaining characters : successive rotation axis 'x', 'y', or 'z' - *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1) + - *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1) - - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix. - - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed - by 'z', or 'z' is followed by 'x'. Otherwise odd (1). - - repetition : first and last axis are same (1) or different (0). - - frame : rotations are applied to static (0) or rotating (1) frame. + - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix. + - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed + by 'z', or 'z' is followed by 'x'. Otherwise odd (1). + - repetition : first and last axis are same (1) or different (0). + - frame : rotations are applied to static (0) or rotating (1) frame. References ---------- @@ -896,9 +896,9 @@ def orthogonalization_matrix(lengths, angles): def superimposition_matrix(v0, v1, scaling=False, usesvd=True): """Return matrix to transform given vector set into second vector set. - v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 vectors. + `v0` and `v1` are shape `(3, *)` or `(4, *)` arrays of at least 3 vectors. - If usesvd is True, the weighted sum of squared deviations (RMSD) is + If `usesvd` is ``True``, the weighted sum of squared deviations (RMSD) is minimized according to the algorithm by W. Kabsch [8]. Otherwise the quaternion based algorithm by B. Horn [9] is used (slower when using this Python implementation). @@ -1377,7 +1377,7 @@ def quaternion_imag(quaternion): def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True): - """Return spherical linear interpolation between two quaternions. + r"""Return spherical linear interpolation between two quaternions. >>> q0 = random_quaternion() >>> q1 = random_quaternion() diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index 1e2a893556f..9916c45bd8d 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -20,7 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -""" +r""" Helper functions --- :mod:`MDAnalysis.lib.util` ==================================================== @@ -1057,7 +1057,8 @@ def asiterable(obj): #: ``(?P\d?)(?P[IFELAX])(?P(?P\d+)(\.(?P\d+))?)?`` #: #: .. _FORTRAN edit descriptor: http://www.cs.mtu.edu/~shene/COURSES/cs201/NOTES/chap05/format.html -FORTRAN_format_regex = "(?P\d+?)(?P[IFEAX])(?P(?P\d+)(\.(?P\d+))?)?" +FORTRAN_format_regex = (r"(?P\d+?)(?P[IFEAX])" + r"(?P(?P\d+)(\.(?P\d+))?)?") _FORTRAN_format_pattern = re.compile(FORTRAN_format_regex) @@ -1424,7 +1425,7 @@ def convert_aa_code(x): #: Regular expression to match and parse a residue-atom selection; will match #: "LYS300:HZ1" or "K300:HZ1" or "K300" or "4GB300:H6O" or "4GB300" or "YaA300". -RESIDUE = re.compile(""" +RESIDUE = re.compile(r""" (?P([ACDEFGHIKLMNPQRSTVWY]) # 1-letter amino acid | # or ([0-9A-Z][a-zA-Z][A-Z][A-Z]?) # 3-letter or 4-letter residue name @@ -2176,7 +2177,7 @@ def newfunc(*args, **kwds): def deprecate(*args, **kwargs): - """Issues a DeprecationWarning, adds warning to `old_name`'s + r"""Issues a DeprecationWarning, adds warning to `old_name`'s docstring, rebinds ``old_name.__name__`` and returns the new function object. @@ -2310,7 +2311,7 @@ def check_box(box): box : array_like The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by - :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`:\n + :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. Returns diff --git a/package/MDAnalysis/topology/ParmEdParser.py b/package/MDAnalysis/topology/ParmEdParser.py index 2961f8c4d17..e1525c87af0 100644 --- a/package/MDAnalysis/topology/ParmEdParser.py +++ b/package/MDAnalysis/topology/ParmEdParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -25,14 +25,15 @@ ParmEd topology parser ====================== -Converts a `ParmEd `_ :class:`parmed.structure.Structure` into a :class:`MDAnalysis.core.Topology`. +Converts a `ParmEd `_ +:class:`parmed.structure.Structure` into a :class:`MDAnalysis.core.Topology`. Example ------- -If you want to use an MDAnalysis-written ParmEd structure for simulation -in ParmEd, you need to first read your files with ParmEd to include the +If you want to use an MDAnalysis-written ParmEd structure for simulation +in ParmEd, you need to first read your files with ParmEd to include the necessary topology parameters. :: >>> import parmed as pmd @@ -42,7 +43,7 @@ >>> prm -We can then convert this to an MDAnalysis structure, select only the +We can then convert this to an MDAnalysis structure, select only the protein atoms, and then convert it back to ParmEd. :: >>> u = mda.Universe(prm) @@ -53,7 +54,8 @@ >>> prm_prot -From here you can create an OpenMM simulation system and minimize the energy. :: +From here you can create an OpenMM simulation system and minimize the +energy. :: >>> import simtk.openmm as mm >>> import simtk.openmm.app as app @@ -81,12 +83,8 @@ """ import logging -import functools -from math import ceil import numpy as np -from ..lib.util import openany -from .guessers import guess_atom_element from .base import TopologyReaderBase, change_squash from .tables import Z2SYMB from ..core.topologyattrs import ( @@ -122,12 +120,14 @@ logger = logging.getLogger("MDAnalysis.topology.ParmEdParser") + def squash_identical(values): if len(values) == 1: return values[0] else: return tuple(values) + class ParmEdParser(TopologyReaderBase): """ For ParmEd structures @@ -153,6 +153,12 @@ def parse(self, **kwargs): Returns ------- MDAnalysis *Topology* object + + + .. versionchanged:: 2.0.0 + Elements are no longer guessed, if the elements present in the + parmed object are not recoginsed (usually given an atomic mass of 0) + then they will be assigned an empty string. """ structure = self.filename @@ -182,8 +188,6 @@ def parse(self, **kwargs): rmin14s = [] epsilon14s = [] - - for atom in structure.atoms: names.append(atom.name) masses.append(atom.mass) @@ -209,7 +213,7 @@ def parse(self, **kwargs): epsilons.append(atom.epsilon) rmin14s.append(atom.rmin_14) epsilon14s.append(atom.epsilon_14) - + attrs = [] n_atoms = len(names) @@ -220,7 +224,7 @@ def parse(self, **kwargs): try: elements.append(Z2SYMB[z]) except KeyError: - elements.append(guess_atom_element(name)) + elements.append('') # Make Atom TopologyAttrs for vals, Attr, dtype in ( @@ -232,7 +236,7 @@ def parse(self, **kwargs): (serials, Atomids, np.int32), (chainids, ChainIDs, object), - (altLocs, AltLocs, object), + (altLocs, AltLocs, object), (bfactors, Tempfactors, np.float32), (occupancies, Occupancies, np.float32), @@ -253,7 +257,8 @@ def parse(self, **kwargs): segids = np.array(segids, dtype=object) residx, (resids, resnames, chainids, segids) = change_squash( - (resids, resnames, chainids, segids), (resids, resnames, chainids, segids)) + (resids, resnames, chainids, segids), + (resids, resnames, chainids, segids)) n_residues = len(resids) attrs.append(Resids(resids)) @@ -284,7 +289,6 @@ def parse(self, **kwargs): cmap_values = {} cmap_types = [] - for bond in structure.bonds: idx = (bond.atom1.idx, bond.atom2.idx) if idx not in bond_values: @@ -299,11 +303,11 @@ def parse(self, **kwargs): bond_values, bond_types, bond_orders = [], [], [] else: bond_types, bond_orders = zip(*values) - + bond_types = list(map(squash_identical, bond_types)) bond_orders = list(map(squash_identical, bond_orders)) - attrs.append(Bonds(bond_values, types=bond_types, guessed=False, + attrs.append(Bonds(bond_values, types=bond_types, guessed=False, order=bond_orders)) for pmdlist, na, values, types in ( @@ -343,4 +347,3 @@ def parse(self, **kwargs): residue_segindex=segidx) return top - diff --git a/package/MDAnalysis/transformations/__init__.py b/package/MDAnalysis/transformations/__init__.py index 13ce68dfbd4..91083fff18b 100644 --- a/package/MDAnalysis/transformations/__init__.py +++ b/package/MDAnalysis/transformations/__init__.py @@ -22,38 +22,107 @@ # -"""\ +""" Trajectory transformations --- :mod:`MDAnalysis.transformations` ================================================================ -The transformations submodule contains a collection of functions to modify the -trajectory. Coordinate transformations, such as PBC corrections and molecule fitting -are often required for some analyses and visualization, and the functions in this -module allow transformations to be applied on-the-fly. -These transformation functions can be called by the user for any given -timestep of the trajectory, added as a workflow using :meth:`add_transformations` -of the :mod:`~MDAnalysis.coordinates.base` module, or upon Universe creation using +The transformations submodule contains a collection of function-like classes to +modify the trajectory. +Coordinate transformations, such as PBC corrections and molecule fitting +are often required for some analyses and visualization, and the functions in +this module allow transformations to be applied on-the-fly. + +A typical transformation class looks like this (note that we keep its name +lowercase because we will treat it as a function, thanks to the ``__call__`` +method): + +.. code-blocks:: python + + class transformation(object): + def __init__(self, *args, **kwargs): + # do some things + # save needed args as attributes. + self.needed_var = args[0] + + def __call__(self, ts): + # apply changes to the Timestep, + # or modify an AtomGroup and return Timestep + + return ts + +As a concrete example we will write a transformation that rotates a group of +atoms around the z-axis through the center of geometry by a fixed increment +for every time step. We will use +:meth:`MDAnalysis.core.groups.AtomGroup.rotateby` +and simply increment the rotation angle every time the +transformation is called :: + + class spin_atoms(object): + def __init__(self, atoms, dphi): + # Rotate atoms by dphi degrees for every ts around the z axis + self.atoms = atoms + self.dphi = dphi + self.axis = np.array([0, 0, 1]) + + def __call__(self, ts): + phi = self.dphi * ts.frame + self.atoms.rotateby(phi, self.axis) + return ts + +This transformation can be used as :: + + u = mda.Universe(PSF, DCD) + u.trajectory.add_transformations(spin_atoms(u.select_atoms("protein"), 1.0)) + +Also see :mod:`MDAnalysis.transformations.translate` for a simple example. + +These transformation functions can be called by the user for any given timestep +of the trajectory, added as a workflow using :meth:`add_transformations` +of the :mod:`~MDAnalysis.coordinates.base`, or upon Universe creation using the keyword argument `transformations`. Note that in the two latter cases, the -workflow cannot be changed after being defined. +workflow cannot be changed after being defined. for example: + +.. code-block:: python -In addition to the specific arguments that each transformation can take, they also -contain a wrapped function that takes a `Timestep` object as argument. -So, a transformation can be roughly defined as follows: + u = mda.Universe(GRO, XTC) + trans = transformation(args) + u.trajectory.add_transformations(trans) + + # it is equivalent to applying this transforamtion to each Timestep by + ts = u.trajectory[0] + ts_trans = trans(ts) + +Transformations can also be created as a closure/nested function. +In addition to the specific arguments that each transformation can take, they +also contain a wrapped function that takes a `Timestep` object as argument. +So, a closure-style transformation can be roughly defined as follows: .. code-block:: python - def transformations(*args,**kwargs): + def transformation(*args,**kwargs): # do some things + def wrapped(ts): - # apply changes to the Timestep object + # apply changes to the Timestep, + # or modify an AtomGroup and return Timestep + return ts return wrapped - -See `MDAnalysis.transformations.translate` for a simple example. +.. Note:: + Although functions (closures) work as transformations, they are not used in + in MDAnalysis from release 2.0.0 onwards because they cannot be reliably + serialized and thus a :class:`Universe` with such transformations cannot be + used with common parallelization schemes (e.g., ones based on + :mod:`multiprocessing`). + For detailed descriptions about how to write a closure-style transformation, + please refer to MDAnalysis 1.x documentation. +.. versionchanged:: 2.0.0 + Transformations should now be created as classes with a :meth:`__call__` + method instead of being written as a function/closure. """ from .translate import translate, center_in_box diff --git a/package/MDAnalysis/transformations/fit.py b/package/MDAnalysis/transformations/fit.py index 084868a0e55..85a2c93e615 100644 --- a/package/MDAnalysis/transformations/fit.py +++ b/package/MDAnalysis/transformations/fit.py @@ -27,21 +27,18 @@ Translate and/or rotates the coordinates of a given trajectory to align a given AtomGroup to a reference structure. -.. autofunction:: fit_translation +.. autoclass:: fit_translation -.. autofunction:: fit_rot_trans +.. autoclass:: fit_rot_trans """ import numpy as np -from functools import partial from ..analysis import align -from ..lib.util import get_weights from ..lib.transformations import euler_from_matrix, euler_matrix -def fit_translation(ag, reference, plane=None, weights=None): - +class fit_translation(object): """Translates a given AtomGroup so that its center of geometry/mass matches the respective center of the given reference. A plane can be given by the user using the option `plane`, and will result in the removal of @@ -49,8 +46,8 @@ def fit_translation(ag, reference, plane=None, weights=None): Example ------- - Removing the translations of a given AtomGroup `ag` on the XY plane by fitting - its center of mass to the center of mass of a reference `ref`: + Removing the translations of a given AtomGroup `ag` on the XY plane by + fitting its center of mass to the center of mass of a reference `ref`: .. code-block:: python @@ -67,11 +64,12 @@ def fit_translation(ag, reference, plane=None, weights=None): :class:`~MDAnalysis.core.groups.AtomGroup` or a whole :class:`~MDAnalysis.core.universe.Universe` reference : Universe or AtomGroup - reference structure, a :class:`~MDAnalysis.core.groups.AtomGroup` or a whole - :class:`~MDAnalysis.core.universe.Universe` + reference structure, a :class:`~MDAnalysis.core.groups.AtomGroup` or a + whole :class:`~MDAnalysis.core.universe.Universe` plane: str, optional - used to define the plane on which the translations will be removed. Defined as a - string of the plane. Suported planes are yz, xz and xy planes. + used to define the plane on which the translations will be removed. + Defined as a string of the plane. + Suported planes are yz, xz and xy planes. weights : {"mass", ``None``} or array_like, optional choose weights. With ``"mass"`` uses masses as weights; with ``None`` weigh each atom equally. If a float array of the same length as @@ -81,39 +79,56 @@ def fit_translation(ag, reference, plane=None, weights=None): Returns ------- MDAnalysis.coordinates.base.Timestep - """ - if plane is not None: - axes = {'yz' : 0, 'xz' : 1, 'xy' : 2} + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. + """ + def __init__(self, ag, reference, plane=None, weights=None): + self.ag = ag + self.reference = reference + self.plane = plane + self.weights = weights + + if self.plane is not None: + axes = {'yz': 0, 'xz': 1, 'xy': 2} + try: + self.plane = axes[self.plane] + except (TypeError, KeyError): + raise ValueError(f'{self.plane} is not a valid plane') \ + from None try: - plane = axes[plane] - except (TypeError, KeyError): - raise ValueError(f'{plane} is not a valid plane') from None - try: - if ag.atoms.n_residues != reference.atoms.n_residues: - errmsg = f"{ag} and {reference} have mismatched number of residues" - raise ValueError(errmsg) - except AttributeError: - errmsg = f"{ag} or {reference} is not valid Universe/AtomGroup" - raise AttributeError(errmsg) from None - ref, mobile = align.get_matching_atoms(reference.atoms, ag.atoms) - weights = align.get_weights(ref.atoms, weights=weights) - ref_com = ref.center(weights) - ref_coordinates = ref.atoms.positions - ref_com - - def wrapped(ts): - mobile_com = np.asarray(mobile.atoms.center(weights), np.float32) - vector = ref_com - mobile_com - if plane is not None: - vector[plane] = 0 + if self.ag.atoms.n_residues != self.reference.atoms.n_residues: + errmsg = ( + f"{self.ag} and {self.reference} have mismatched" + f"number of residues" + ) + + raise ValueError(errmsg) + except AttributeError: + errmsg = ( + f"{self.ag} or {self.reference} is not valid" + f"Universe/AtomGroup" + ) + raise AttributeError(errmsg) from None + self.ref, self.mobile = align.get_matching_atoms(self.reference.atoms, + self.ag.atoms) + self.weights = align.get_weights(self.ref.atoms, weights=self.weights) + self.ref_com = self.ref.center(self.weights) + + def __call__(self, ts): + mobile_com = np.asarray(self.mobile.atoms.center(self.weights), + np.float32) + vector = self.ref_com - mobile_com + if self.plane is not None: + vector[self.plane] = 0 ts.positions += vector return ts - return wrapped - -def fit_rot_trans(ag, reference, plane=None, weights=None): +class fit_rot_trans(object): """Perform a spatial superposition by minimizing the RMSD. Spatially align the group of atoms `ag` to `reference` by doing a RMSD @@ -160,41 +175,59 @@ def fit_rot_trans(ag, reference, plane=None, weights=None): ------- MDAnalysis.coordinates.base.Timestep """ - if plane is not None: - axes = {'yz' : 0, 'xz' : 1, 'xy' : 2} + def __init__(self, ag, reference, plane=None, weights=None): + self.ag = ag + self.reference = reference + self.plane = plane + self.weights = weights + + if self.plane is not None: + axes = {'yz': 0, 'xz': 1, 'xy': 2} + try: + self.plane = axes[self.plane] + except (TypeError, KeyError): + raise ValueError(f'{self.plane} is not a valid plane') \ + from None try: - plane = axes[plane] - except (TypeError, KeyError): - raise ValueError(f'{plane} is not a valid plane') from None - try: - if ag.atoms.n_residues != reference.atoms.n_residues: - errmsg = f"{ag} and {reference} have mismatched number of residues" - raise ValueError(errmsg) - except AttributeError: - errmsg = f"{ag} or {reference} is not valid Universe/AtomGroup" - raise AttributeError(errmsg) from None - ref, mobile = align.get_matching_atoms(reference.atoms, ag.atoms) - weights = align.get_weights(ref.atoms, weights=weights) - ref_com = ref.center(weights) - ref_coordinates = ref.atoms.positions - ref_com - - def wrapped(ts): - mobile_com = mobile.atoms.center(weights) - mobile_coordinates = mobile.atoms.positions - mobile_com - rotation, dump = align.rotation_matrix(mobile_coordinates, ref_coordinates, weights=weights) - vector = ref_com - if plane is not None: - matrix = np.r_[rotation, np.zeros(3).reshape(1,3)] + if self.ag.atoms.n_residues != self.reference.atoms.n_residues: + errmsg = ( + f"{self.ag} and {self.reference} have mismatched " + f"number of residues" + ) + raise ValueError(errmsg) + except AttributeError: + errmsg = ( + f"{self.ag} or {self.reference} is not valid " + f"Universe/AtomGroup" + ) + raise AttributeError(errmsg) from None + self.ref, self.mobile = align.get_matching_atoms(self.reference.atoms, + self.ag.atoms) + self.weights = align.get_weights(self.ref.atoms, weights=self.weights) + self.ref_com = self.ref.center(self.weights) + self.ref_coordinates = self.ref.atoms.positions - self.ref_com + + def __call__(self, ts): + mobile_com = self.mobile.atoms.center(self.weights) + mobile_coordinates = self.mobile.atoms.positions - mobile_com + rotation, dump = align.rotation_matrix(mobile_coordinates, + self.ref_coordinates, + weights=self.weights) + vector = self.ref_com + if self.plane is not None: + matrix = np.r_[rotation, np.zeros(3).reshape(1, 3)] matrix = np.c_[matrix, np.zeros(4)] - euler_angs = np.asarray(euler_from_matrix(matrix, axes='sxyz'), np.float32) + euler_angs = np.asarray(euler_from_matrix(matrix, axes='sxyz'), + np.float32) for i in range(0, euler_angs.size): - euler_angs[i] = ( euler_angs[plane] if i == plane else 0) - rotation = euler_matrix(euler_angs[0], euler_angs[1], euler_angs[2], axes='sxyz')[:3, :3] - vector[plane] = mobile_com[plane] + euler_angs[i] = (euler_angs[self.plane] if i == self.plane + else 0) + rotation = euler_matrix(euler_angs[0], + euler_angs[1], + euler_angs[2], + axes='sxyz')[:3, :3] + vector[self.plane] = mobile_com[self.plane] ts.positions = ts.positions - mobile_com ts.positions = np.dot(ts.positions, rotation.T) ts.positions = ts.positions + vector - return ts - - return wrapped diff --git a/package/MDAnalysis/transformations/positionaveraging.py b/package/MDAnalysis/transformations/positionaveraging.py index 179b98163f1..930a77774b3 100644 --- a/package/MDAnalysis/transformations/positionaveraging.py +++ b/package/MDAnalysis/transformations/positionaveraging.py @@ -36,15 +36,14 @@ import warnings -class PositionAverager(object): +class PositionAverager(object): """ - Averages the coordinates of a given timestep so that the coordinates of the AtomGroup correspond to the average positions of the N previous frames. For frames < N, the average of the frames iterated up to that point will be returned. - + Example ------- @@ -55,13 +54,13 @@ class PositionAverager(object): complete, or if the frames iterated are not sequential. .. code-block:: python - + N=3 transformation = PositionAverager(N, check_reset=True) u.trajectory.add_transformations(transformation) for ts in u.trajectory: print(ts.positions) - + In this case, ``ts.positions`` will return the average coordinates of the last N iterated frames. @@ -136,52 +135,59 @@ class PositionAverager(object): """ - + def __init__(self, avg_frames, check_reset=True): self.avg_frames = avg_frames self.check_reset = check_reset self.current_avg = 0 self.resetarrays() - + self.current_frame = 0 + def resetarrays(self): self.idx_array = np.empty(self.avg_frames) self.idx_array[:] = np.nan - - def rollidx(self,ts): - self.idx_array = np.roll(self.idx_array, 1) + + def rollidx(self, ts): + self.idx_array = np.roll(self.idx_array, 1) self.idx_array[0] = ts.frame - - def rollposx(self,ts): + + def rollposx(self, ts): try: self.coord_array.size except AttributeError: size = (ts.positions.shape[0], ts.positions.shape[1], self.avg_frames) self.coord_array = np.empty(size) - + self.coord_array = np.roll(self.coord_array, 1, axis=2) - self.coord_array[...,0] = ts.positions.copy() - - + self.coord_array[..., 0] = ts.positions.copy() + def __call__(self, ts): + # calling the same timestep will not add new data to coord_array + # This can prevent from getting different values when + # call `u.trajectory[i]` multiple times. + if (ts.frame == self.current_frame + and hasattr(self, 'coord_array') + and not np.isnan(self.idx_array).all()): + test = ~np.isnan(self.idx_array) + ts.positions = np.mean(self.coord_array[..., test], axis=2) + return ts + else: + self.current_frame = ts.frame + self.rollidx(ts) test = ~np.isnan(self.idx_array) self.current_avg = sum(test) - if self.current_avg == 1: - return ts - if self.check_reset: sign = np.sign(np.diff(self.idx_array[test])) - - if not (np.all(sign == 1) or np.all(sign==-1)): + if not (np.all(sign == 1) or np.all(sign == -1)): warnings.warn('Cannot average position for non sequential' 'iterations. Averager will be reset.', Warning) self.resetarrays() return self(ts) - + self.rollposx(ts) - ts.positions = np.mean(self.coord_array[...,test], axis=2) - + ts.positions = np.mean(self.coord_array[..., test], axis=2) + return ts - diff --git a/package/MDAnalysis/transformations/rotate.py b/package/MDAnalysis/transformations/rotate.py index 57871292ff3..afbba495a61 100644 --- a/package/MDAnalysis/transformations/rotate.py +++ b/package/MDAnalysis/transformations/rotate.py @@ -25,20 +25,20 @@ Trajectory rotation --- :mod:`MDAnalysis.transformations.rotate` ================================================================ -Rotates the coordinates by a given angle arround an axis formed by a direction and a -point +Rotates the coordinates by a given angle arround an axis formed by a direction +and a point. -.. autofunction:: rotateby +.. autoclass:: rotateby """ -import math import numpy as np from functools import partial from ..lib.transformations import rotation_matrix from ..lib.util import get_weights -def rotateby(angle, direction, point=None, ag=None, weights=None, wrap=False): + +class rotateby(object): ''' Rotates the trajectory by a given angle on a given axis. The axis is defined by the user, combining the direction vector and a point. This point can be the center @@ -107,48 +107,69 @@ def rotateby(angle, direction, point=None, ag=None, weights=None, wrap=False): Wrapping/unwrapping the trajectory or performing PBC corrections may not be possible after rotating the trajectory. + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. ''' - angle = np.deg2rad(angle) - try: - direction = np.asarray(direction, np.float32) - if direction.shape != (3, ) and direction.shape != (1, 3): - raise ValueError('{} is not a valid direction'.format(direction)) - direction = direction.reshape(3, ) - except ValueError: - raise ValueError(f'{direction} is not a valid direction') from None - if point is not None: - point = np.asarray(point, np.float32) - if point.shape != (3, ) and point.shape != (1, 3): - raise ValueError('{} is not a valid point'.format(point)) - point = point.reshape(3, ) - elif ag: + def __init__(self, + angle, + direction, + point=None, + ag=None, + weights=None, + wrap=False): + self.angle = angle + self.direction = direction + self.point = point + self.ag = ag + self.weights = weights + self.wrap = wrap + + self.angle = np.deg2rad(self.angle) try: - atoms = ag.atoms - except AttributeError: - raise ValueError(f'{ag} is not an AtomGroup object') from None - else: + self.direction = np.asarray(self.direction, np.float32) + if self.direction.shape != (3, ) and \ + self.direction.shape != (1, 3): + raise ValueError('{} is not a valid direction' + .format(self.direction)) + self.direction = self.direction.reshape(3, ) + except ValueError: + raise ValueError(f'{self.direction} is not a valid direction') \ + from None + if self.point is not None: + self.point = np.asarray(self.point, np.float32) + if self.point.shape != (3, ) and self.point.shape != (1, 3): + raise ValueError('{} is not a valid point'.format(self.point)) + self.point = self.point.reshape(3, ) + elif self.ag: try: - weights = get_weights(atoms, weights=weights) - except (ValueError, TypeError): - errmsg = ("weights must be {'mass', None} or an iterable of " - "the same size as the atomgroup.") - raise TypeError(errmsg) from None - center_method = partial(atoms.center, weights, pbc=wrap) - else: - raise ValueError('A point or an AtomGroup must be specified') - - def wrapped(ts): - if point is None: - position = center_method() + self.atoms = self.ag.atoms + except AttributeError: + raise ValueError(f'{self.ag} is not an AtomGroup object') \ + from None + else: + try: + self.weights = get_weights(self.atoms, + weights=self.weights) + except (ValueError, TypeError): + errmsg = ("weights must be {'mass', None} or an iterable " + "of the same size as the atomgroup.") + raise TypeError(errmsg) from None + self.center_method = partial(self.atoms.center, + self.weights, + pbc=self.wrap) + else: + raise ValueError('A point or an AtomGroup must be specified') + + def __call__(self, ts): + if self.point is None: + position = self.center_method() else: - position = point - matrix = rotation_matrix(angle, direction, position) + position = self.point + matrix = rotation_matrix(self.angle, self.direction, position) rotation = matrix[:3, :3].T translation = matrix[:3, 3] - ts.positions= np.dot(ts.positions, rotation) + ts.positions = np.dot(ts.positions, rotation) ts.positions += translation - return ts - - return wrapped - diff --git a/package/MDAnalysis/transformations/translate.py b/package/MDAnalysis/transformations/translate.py index 6a4d28113be..1b1be2d92ed 100644 --- a/package/MDAnalysis/transformations/translate.py +++ b/package/MDAnalysis/transformations/translate.py @@ -30,18 +30,17 @@ or defined by centering an AtomGroup in the unit cell using the function :func:`center_in_box` -.. autofunction:: translate +.. autoclass:: translate -.. autofunction:: center_in_box +.. autoclass:: center_in_box """ import numpy as np from functools import partial -from ..lib.mdamath import triclinic_vectors -def translate(vector): +class translate(object): """ Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep` instance by a given vector. @@ -60,20 +59,20 @@ def translate(vector): :class:`~MDAnalysis.coordinates.base.Timestep` object """ - if len(vector)>2: - vector = np.float32(vector) - else: - raise ValueError("{} vector is too short".format(vector)) + def __init__(self, vector): + self.vector = vector - def wrapped(ts): - ts.positions += vector + if len(self.vector) > 2: + self.vector = np.float32(self.vector) + else: + raise ValueError("{} vector is too short".format(self.vector)) + def __call__(self, ts): + ts.positions += self.vector return ts - return wrapped - -def center_in_box(ag, center='geometry', point=None, wrap=False): +class center_in_box(object): """ Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep` instance so that the center of geometry/mass of the given :class:`~MDAnalysis.core.groups.AtomGroup` @@ -108,39 +107,48 @@ def center_in_box(ag, center='geometry', point=None, wrap=False): ------- :class:`~MDAnalysis.coordinates.base.Timestep` object - """ - - pbc_arg = wrap - if point: - point = np.asarray(point, np.float32) - if point.shape != (3, ) and point.shape != (1, 3): - raise ValueError('{} is not a valid point'.format(point)) - try: - if center == 'geometry': - center_method = partial(ag.center_of_geometry, pbc=pbc_arg) - elif center == 'mass': - center_method = partial(ag.center_of_mass, pbc=pbc_arg) - else: - raise ValueError('{} is not a valid argument for center'.format(center)) - except AttributeError: - if center == 'mass': - errmsg = f'{ag} is not an AtomGroup object with masses' - raise AttributeError(errmsg) from None - else: - raise ValueError(f'{ag} is not an AtomGroup object') from None - def wrapped(ts): - if point is None: + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. + """ + def __init__(self, ag, center='geometry', point=None, wrap=False): + self.ag = ag + self.center = center + self.point = point + self.wrap = wrap + + pbc_arg = self.wrap + if self.point: + self.point = np.asarray(self.point, np.float32) + if self.point.shape != (3, ) and self.point.shape != (1, 3): + raise ValueError('{} is not a valid point'.format(self.point)) + try: + if self.center == 'geometry': + self.center_method = partial(self.ag.center_of_geometry, + pbc=pbc_arg) + elif self.center == 'mass': + self.center_method = partial(self.ag.center_of_mass, + pbc=pbc_arg) + else: + raise ValueError(f'{self.center} is valid for center') + except AttributeError: + if self.center == 'mass': + errmsg = f'{self.ag} is not an AtomGroup object with masses' + raise AttributeError(errmsg) from None + else: + raise ValueError(f'{self.ag} is not an AtomGroup object') \ + from None + + def __call__(self, ts): + if self.point is None: boxcenter = np.sum(ts.triclinic_dimensions, axis=0) / 2 else: - boxcenter = point + boxcenter = self.point - ag_center = center_method() + ag_center = self.center_method() vector = boxcenter - ag_center ts.positions += vector return ts - - return wrapped - diff --git a/package/MDAnalysis/transformations/wrap.py b/package/MDAnalysis/transformations/wrap.py index 99b60f943a1..10e565fc072 100644 --- a/package/MDAnalysis/transformations/wrap.py +++ b/package/MDAnalysis/transformations/wrap.py @@ -28,16 +28,16 @@ translates the atoms back in the unit cell. :func:`unwrap` translates the atoms of each molecule so that bons don't split over images. -.. autofunction:: wrap +.. autoclass:: wrap -.. autofunction:: unwrap +.. autoclass:: unwrap """ from ..lib._cutil import make_whole -def wrap(ag, compound='atoms'): +class wrap(object): """ Shift the contents of a given AtomGroup back into the unit cell. :: @@ -79,18 +79,22 @@ def wrap(ag, compound='atoms'): Returns ------- MDAnalysis.coordinates.base.Timestep - + + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. """ - - def wrapped(ts): - ag.wrap(compound=compound) - + def __init__(self, ag, compound='atoms'): + self.ag = ag + self.compound = compound + + def __call__(self, ts): + self.ag.wrap(compound=self.compound) return ts - - return wrapped -def unwrap(ag): +class unwrap(object): """ Move all atoms in an AtomGroup so that bonds don't split over images @@ -129,19 +133,21 @@ def unwrap(ag): Returns ------- MDAnalysis.coordinates.base.Timestep - + + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. """ - - try: - ag.fragments - except AttributeError: - raise AttributeError("{} has no fragments".format(ag)) - - def wrapped(ts): - for frag in ag.fragments: + def __init__(self, ag): + self.ag = ag + + try: + self.ag.fragments + except AttributeError: + raise AttributeError("{} has no fragments".format(self.ag)) + + def __call__(self, ts): + for frag in self.ag.fragments: make_whole(frag) - return ts - - return wrapped - diff --git a/package/doc/sphinx/source/conf.py b/package/doc/sphinx/source/conf.py index 296b479d6f9..a149c769a8f 100644 --- a/package/doc/sphinx/source/conf.py +++ b/package/doc/sphinx/source/conf.py @@ -346,4 +346,5 @@ 'https://gsd.readthedocs.io/en/stable/': None, 'https://parmed.github.io/ParmEd/html/': None, 'https://docs.h5py.org/en/stable': None, + 'https://www.rdkit.org/docs/': None, } diff --git a/package/doc/sphinx/source/documentation_pages/analysis/helanal.rst b/package/doc/sphinx/source/documentation_pages/analysis/helanal.rst deleted file mode 100644 index d61297342e2..00000000000 --- a/package/doc/sphinx/source/documentation_pages/analysis/helanal.rst +++ /dev/null @@ -1,2 +0,0 @@ -.. automodule:: MDAnalysis.analysis.helanal - diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 06474b4f8c1..95605bace5b 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -108,7 +108,6 @@ Macromolecules :maxdepth: 1 analysis/gnm - analysis/helanal analysis/helix_analysis analysis/dihedrals diff --git a/package/doc/sphinx/source/documentation_pages/converters.rst b/package/doc/sphinx/source/documentation_pages/converters.rst index 8bdcb913fc0..c70d2324184 100644 --- a/package/doc/sphinx/source/documentation_pages/converters.rst +++ b/package/doc/sphinx/source/documentation_pages/converters.rst @@ -33,4 +33,5 @@ you will have to specify a package name (case-insensitive). :: :maxdepth: 1 converters/ParmEdParser + converters/RDKitParser diff --git a/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst new file mode 100644 index 00000000000..174a1ff1115 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/converters/RDKitParser.rst @@ -0,0 +1,3 @@ +.. automodule:: MDAnalysis.topology.RDKitParser + +.. automodule:: MDAnalysis.coordinates.RDKit diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 80dc3aa8b60..121f380fb46 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -12,7 +12,7 @@ syntax`_):: >>> kalp = universe.select_atoms("segid KALP") .. _`CHARMM's atom selection syntax`: - http://www.charmm.org/documentation/c37b1/select.html + https://www.charmm.org/charmm/documentation/by-version/c45b1/select.html The :meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` method of a :class:`~MDAnalysis.core.groups.AtomGroup` or a @@ -99,6 +99,12 @@ moltype *molecule-type* select by molecule type, e.g. ``moltype Protein_A``. At the moment, only the TPR format defines the molecule type. +smarts *SMARTS-query* + select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to + find nitrogen atoms in rings. Requires RDKit. All matches (max 1000) are + combined as a unique match. + + Pattern matching ---------------- diff --git a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst deleted file mode 100644 index 9f6bbd1dac9..00000000000 --- a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: MDAnalysis.topology.RDKitParser \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index 04ebf433e27..ed8caba8ce6 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -43,7 +43,6 @@ topology file format in the *topology_format* keyword argument to topology/PDBQTParser topology/PQRParser topology/PSFParser - topology/RDKitParser topology/TOPParser topology/TPRParser topology/TXYZParser diff --git a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst index 8b91466e856..9662f687442 100644 --- a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst +++ b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst @@ -7,8 +7,8 @@ Trajectory transformations ("on-the-fly" transformations) .. module:: MDAnalysis.transformations -In MDAnalysis, a *transformation* is a function that modifies the data -for the current :class:`Timestep` and returns the +In MDAnalysis, a *transformation* is a function/function-like class +that modifies the data for the current :class:`Timestep` and returns the :class:`Timestep`. For instance, coordinate transformations, such as PBC corrections and molecule fitting are often required for some analyses and visualization. Transformation functions @@ -34,7 +34,7 @@ trajectory is **transformed on-the-fly**, i.e., the data read from the trajectory file will be changed before it is made available in, say, the :attr:`AtomGroup.positions` attribute. -The submodule :mod:`MDAnalysis.transformations` contains a +The submodule :mod:`MDAnalysis.transformations` contains a collection of transformations (see :ref:`transformations-module`) that can be immediately used but one can always write custom transformations (see :ref:`custom-transformations`). @@ -90,40 +90,67 @@ being added. Creating transformations ------------------------ -A *transformation* is a function that takes a +A simple *transformation* can also be a function that takes a :class:`~MDAnalysis.coordinates.base.Timestep` as input, modifies it, and -returns it. - -A simple transformation that takes no other arguments but a :class:`Timestep` +returns it. If it takes no other arguments but a :class:`Timestep` can be defined as the following example: .. code-block:: python def up_by_2(ts): - """ - Translate all coordinates by 2 angstroms up along the Z dimension. - """ - ts.positions = ts.positions + np.array([0, 0, 2], dtype=np.float32) - return ts - + """ + Translate all coordinates by 2 angstroms up along the Z dimension. + """ + ts.positions = ts.positions + np.array([0, 0, 2], dtype=np.float32) + return ts If the transformation requires other arguments besides the :class:`Timestep`, -the transformation takes these arguments, while a wrapped function takes the -:class:`Timestep` object as argument. So, a transformation can be roughly -defined as follows: +the following two methods can be used to create such transformation: + + +.. _custom-transformations-class: + +Creating complex transformation classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is implemented by defining :func:`__call__` for the transformation class +and can be applied directly to a :class:`Timestep`. +So, a transformation class can be roughly defined as follows: .. code-block:: python - def up_by_x(distance): - """ - Creates a transformation that will translate all coordinates by a given amount along the Z dimension. - """ - def wrapped(ts): - ts.positions = ts.positions + np.array([0, 0, distance], dtype=np.float32) - return ts - return wrapped - - + class up_by_x_class(object): + def __init__(self, distance): + self.distance = distance + + def __call__(self, ts): + ts.positions = ts.positions + np.array([0, 0, self.distance], dtype=np.float32) + return ts + +It is the default construction method in :mod:`MDAnalysis.transformations` +from release 2.0.0 onwards because it can be reliably serialized. +See :class:`MDAnalysis.transformations.translate` for a simple example. + + +.. _custom-transformations-closure: + +Creating complex transformation closure functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Transformation can also be a wrapped function takes the :class:`Timestep` object as argument. +So in this case, a transformation function (closure) can be roughly defined as follows: + +.. code-block:: python + + def up_by_x_func(distance): + """ + Creates a transformation that will translate all coordinates by a given amount along the Z dimension. + """ + def wrapped(ts): + ts.positions = ts.positions + np.array([0, 0, distance], dtype=np.float32) + return ts + return wrapped + An alternative to using a wrapped function is using partials from :mod:`functools`. The above function can be written as: @@ -132,15 +159,18 @@ above function can be written as: import functools def up_by_x(ts, distance): - ts.positions = ts.positions + np.array([0, 0, distance], dtype=np.float32) - return ts + ts.positions = ts.positions + np.array([0, 0, distance], dtype=np.float32) + return ts up_by_2 = functools.partial(up_by_x, distance=2) - -See :func:`MDAnalysis.transformations.translate` for a simple -example of such a type of function. - +Although functions (closures) work as transformations, they are not used in +in MDAnalysis from release 2.0.0 onwards because they cannot be reliably +serialized and thus a :class:`Universe` with such transformations cannot be +used with common parallelization schemes (e.g., ones based on +:mod:`multiprocessing`). +For detailed descriptions about how to write a closure-style transformation, +please refer to MDAnalysis 1.x documentation. .. _transformations-module: diff --git a/package/setup.py b/package/setup.py index ca5967a355f..436798d6898 100755 --- a/package/setup.py +++ b/package/setup.py @@ -261,8 +261,15 @@ def extensions(config): use_cython = config.get('use_cython', default=not is_release) use_openmp = config.get('use_openmp', default=True) - extra_compile_args = ['-std=c99', '-ffast-math', '-O3', '-funroll-loops', - '-fsigned-zeros'] # see #2722 + if platform.machine() == 'aarch64': + # reduce optimization level for ARM64 machines + # because of issues with test failures sourcing to: + # MDAnalysis/analysis/encore/clustering/src/ap.c + extra_compile_args = ['-std=c99', '-ffast-math', '-O1', '-funroll-loops', + '-fsigned-zeros'] + else: + extra_compile_args = ['-std=c99', '-ffast-math', '-O3', '-funroll-loops', + '-fsigned-zeros'] # see #2722 define_macros = [] if config.get('debug_cflags', default=False): extra_compile_args.extend(['-Wall', '-pedantic']) diff --git a/testsuite/MDAnalysisTests/analysis/test_bat.py b/testsuite/MDAnalysisTests/analysis/test_bat.py index 36ef684319b..9fb0e93b977 100644 --- a/testsuite/MDAnalysisTests/analysis/test_bat.py +++ b/testsuite/MDAnalysisTests/analysis/test_bat.py @@ -45,6 +45,14 @@ def bat(self, selected_residues): R.run() return R.bat + @pytest.fixture + def bat_npz(self, tmpdir, selected_residues): + filename = str(tmpdir / 'test_bat_IO.npy') + R = BAT(selected_residues) + R.run() + R.save(filename) + return filename + def test_bat_root_selection(self, selected_residues): R = BAT(selected_residues) assert_equal(R._root.indices, [8, 2, 1], @@ -79,11 +87,8 @@ def test_bat_reconstruction(self, selected_residues, bat): err_msg="error: Reconstructed Cartesian coordinates " + \ "don't match original") - def test_bat_IO(self, selected_residues, bat): - R = BAT(selected_residues) - R.run() - R.save('test_bat_IO.npy') - R2 = BAT(selected_residues, filename='test_bat_IO.npy') + def test_bat_IO(self, bat_npz, selected_residues, bat): + R2 = BAT(selected_residues, filename=bat_npz) test_bat = R2.bat assert_almost_equal( bat, @@ -107,8 +112,8 @@ def test_bat_disconnected_atom_group(self): with pytest.raises(ValueError): R = BAT(selected_residues) - def test_bat_incorrect_dims(self): + def test_bat_incorrect_dims(self, bat_npz): u = mda.Universe(PSF, DCD) selected_residues = u.select_atoms("resid 1-3") with pytest.raises(ValueError): - R = BAT(selected_residues, filename='test_bat_IO.npy') + R = BAT(selected_residues, filename=bat_npz) diff --git a/testsuite/MDAnalysisTests/analysis/test_density.py b/testsuite/MDAnalysisTests/analysis/test_density.py index 600de2590f7..8db8b2df1a3 100644 --- a/testsuite/MDAnalysisTests/analysis/test_density.py +++ b/testsuite/MDAnalysisTests/analysis/test_density.py @@ -213,8 +213,8 @@ def test_userdefn_boxshape(self, universe): assert D.density.grid.shape == (8, 12, 17) def test_warn_userdefn_padding(self, universe): - regex = ("Box padding \(currently set at 1\.0\) is not used " - "in user defined grids\.") + regex = (r"Box padding \(currently set at 1\.0\) is not used " + r"in user defined grids\.") with pytest.warns(UserWarning, match=regex): D = density.DensityAnalysis( universe.select_atoms(self.selections['static']), diff --git a/testsuite/MDAnalysisTests/analysis/test_helanal.py b/testsuite/MDAnalysisTests/analysis/test_helanal.py deleted file mode 100644 index f6aaa40bb01..00000000000 --- a/testsuite/MDAnalysisTests/analysis/test_helanal.py +++ /dev/null @@ -1,209 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -import re - -import numpy as np -import pytest -from numpy.testing import assert_equal, assert_almost_equal - -import MDAnalysis as mda -import MDAnalysis.analysis.helanal -from MDAnalysisTests.datafiles import (GRO, XTC, PSF, DCD, PDB_small, - HELANAL_BENDING_MATRIX, - HELANAL_BENDING_MATRIX_SUBSET) - -# reference data from a single PDB file: -# data = MDAnalysis.analysis.helanal.helanal_main(PDB_small, -# select="name CA and resnum 161-187") -HELANAL_SINGLE_DATA = { - 'Best fit tilt': 1.3309656332019535, - 'Height': np.array([ 1.5286051 , 0.19648294, 0.11384312], - dtype=np.float32), - 'Local bending angles': - np.array([ 3.44526005, 4.85425806, 4.69548464, 2.39473653, - 3.56172442, 3.97128344, 3.41563916, 1.86140978, - 5.22997046, 5.41381264, 27.49601364, 39.69839478, - 35.05921936, 21.78928566, 9.8632431 , 8.80066967, - 5.5344553 , 6.14356709, 10.15450764, 11.07686138, - 9.23541832], dtype=np.float32), - 'Residues/turn': np.array([ 3.64864163, 0.152694 , 0.1131402 ]), - 'Rotation Angles': - np.array([ 87.80540079, -171.86019984, -75.2341296 , 24.61695962, - 121.77104796, -134.94786976, -35.07857424, 58.9621866 , - 159.40210233, -104.83368122, -7.54816243, 87.40202629, - -176.13071955, -89.13196878, 17.17321345, 105.26627814, - -147.00075298, -49.36850775, 54.24557615, 156.06486532, - -110.82698327, -5.72138626, 85.36050546, -167.28218858, - -68.23076936]), - 'Twist': np.array([ 98.83011627, 4.08171701, 3.07253003], - dtype=np.float32), - 'Unit twist angles': - np.array([ 97.23709869, 99.09676361, 97.25350952, 101.76019287, - 100.42689514, 97.08784485, 97.07430267, 98.33553314, - 97.86578369, 95.45792389, 97.10089111, 95.26415253, - 87.93136597, 108.38458252, 95.27167511, 104.01931763, - 100.7199707 , 101.48034668, 99.64170074, 94.78946686, - 102.50147247, 97.25154877, 104.54204559, 101.42829895], - dtype=np.float32), - } - - -def read_bending_matrix(fn): - """Read helanal_bending_matrix.dat into dict of numpy arrays. - - This is a quick and dirty parser with no error checking. - - Format:: - - Mean - 0.0 5.7 10.9 .... - . - - SD - .... - . - - ABDEV - .... - . - - Returns - ------- - data : dict - dictionary with keys 'Mean', 'SD', 'ABDEV' and NxN matrices. - """ - data = {} - current = None - with open(fn) as bendingmatrix: - for line in bendingmatrix: - line = line.strip() - if not line: - continue - if re.match("\D+", line): - label = line.split()[0] - current = data[label] = [] - else: - current.append([float(x) for x in line.split()]) - for k,v in data.items(): - data[k] = np.array(v) - return data - - -def test_helanal_trajectory(tmpdir, reference=HELANAL_BENDING_MATRIX, - outfile="helanal_bending_matrix.dat"): - u = mda.Universe(PSF, DCD) - with tmpdir.as_cwd(): - # Helix 8: 161 - 187 http://www.rcsb.org/pdb/explore.do?structureId=4AKE - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA and resnum 161-187") - bendingmatrix = read_bending_matrix(outfile) - ref = read_bending_matrix(reference) - assert_equal(sorted(bendingmatrix.keys()), sorted(ref.keys()), - err_msg="different contents in bending matrix data file") - for label in ref.keys(): - assert_almost_equal( - bendingmatrix[label], ref[label], err_msg="bending matrix " - "stats for {0} mismatch".format(label)) - - -def test_helanal_trajectory_slice(tmpdir, - reference=HELANAL_BENDING_MATRIX_SUBSET, - outfile="helanal_bending_matrix.dat"): - """Simple regression test to validate that begin/finish work as - intended. In this case, frames 10 (time: 10.999999031120204) through to - 79 (time: 79.99999295360149) should be picked up.""" - u = mda.Universe(PSF, DCD) - with tmpdir.as_cwd(): - # Helix 8: 161 - 187 http://www.rcsb.org/pdb/explore.do?structureId=4AKE - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA and resnum 161-187", begin=10, finish=80 - ) - bendingmatrix = read_bending_matrix(outfile) - ref = read_bending_matrix(reference) - assert_equal(sorted(bendingmatrix.keys()), sorted(ref.keys()), - err_msg="different contents in bending matrix data file") - for label in ref.keys(): - assert_almost_equal( - bendingmatrix[label], ref[label], err_msg="bending matrix " - "starts for {0} mismatch".format(label)) - - -def test_helanal_main(reference=HELANAL_SINGLE_DATA): - u = mda.Universe(PDB_small) - # Helix 8: 161 - 187 http://www.rcsb.org/pdb/explore.do?structureId=4AKE - data = MDAnalysis.analysis.helanal.helanal_main( - PDB_small, select="name CA and resnum 161-187") - ref = reference - assert_equal(sorted(data.keys()), sorted(ref.keys()), - err_msg="different contents in data dict") - for label in ref.keys(): - assert_almost_equal(data[label], ref[label], decimal=4, - err_msg="data[{0}] mismatch".format(label)) - - -def test_exceptions(tmpdir): - """Testing exceptions which can be raised""" - u = MDAnalysis.Universe(GRO, XTC) - u.trajectory[1] - - # Testing xtc striding: Check for resolution of Issue #188 - with tmpdir.as_cwd(): - with pytest.raises(ValueError): - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA", finish=5 - ) - - with tmpdir.as_cwd(): - with pytest.raises(ValueError): - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA", begin=1, finish=0 - ) - - with tmpdir.as_cwd(): - with pytest.raises(ValueError): - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA", begin=99999 - ) - - -def test_warnings(tmpdir): - """Testing that a warning which can be raised""" - u = MDAnalysis.Universe(GRO, XTC) - u.trajectory[1] - - with tmpdir.as_cwd(): - with pytest.warns(UserWarning) as record: - MDAnalysis.analysis.helanal.helanal_trajectory( - u, select="name CA", begin=-1, finish=99999 - ) - - wmsg1 = ("The input begin time (-1 ps) precedes the starting " - "trajectory time --- Setting starting frame to 0".format( - -1,0)) - assert str(record[-2].message.args[0]) == wmsg1 - wmsg2 = ("The input finish time ({0} ps) occurs after the end of " - "the trajectory ({1} ps). Finish time will be set to " - "the end of the trajectory".format( - 99999,1000.0000762939453)) - assert str(record[-1].message.args[0]) == wmsg2 - diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py index b12b8087867..d90a2ce0488 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -474,7 +474,7 @@ def test_guess_hydrogens_empty_selection(self, h): def test_guess_hydrogens_min_max_mass(self, h): - errmsg = "min_mass is higher than \(or equal to\) max_mass" + errmsg = r"min_mass is higher than \(or equal to\) max_mass" with pytest.raises(ValueError, match=errmsg): diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 9cd3fbc5989..3d4b2c9737a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -42,6 +42,8 @@ assert_array_almost_equal, assert_almost_equal) +IGNORE_NO_INFORMATION_WARNING = 'ignore:Found no information for attr:UserWarning' + class TestPDBReader(_SingleFrameReader): __test__ = True @@ -218,6 +220,22 @@ def universe_and_expected_dims(self, request): def outfile(self, tmpdir): return str(tmpdir.mkdir("PDBWriter").join('primitive-pdb-writer' + self.ext)) + @pytest.fixture + def u_no_ids(self): + # The test universe does not have atom ids, but it has everything + # else the PDB writer expects to avoid issuing warnings. + universe = make_Universe( + [ + 'names', 'resids', 'resnames', 'altLocs', + 'segids', 'occupancies', 'tempfactors', + ], + trajectory=True, + ) + universe.add_TopologyAttr('icodes', [' '] * len(universe.residues)) + universe.add_TopologyAttr('record_types', ['ATOM'] * len(universe.atoms)) + universe.dimensions = [10, 10, 10, 90, 90, 90] + return universe + @pytest.fixture def u_no_resnames(self): return make_Universe(['names', 'resids'], trajectory=True) @@ -513,6 +531,55 @@ def test_abnormal_record_type(self, universe5, tmpdir, outfile): with pytest.raises(ValueError, match=expected_msg): u.atoms.write(outfile) + @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING) + def test_no_reindex(self, universe, outfile): + """ + When setting the `reindex` keyword to False, the atom are + not reindexed. + """ + universe.atoms.ids = universe.atoms.ids + 23 + universe.atoms.write(outfile, reindex=False) + read_universe = mda.Universe(outfile) + assert np.all(read_universe.atoms.ids == universe.atoms.ids) + + @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING) + def test_no_reindex_bonds(self, universe, outfile): + """ + When setting the `reindex` keyword to False, the connect + record match the non-reindexed atoms. + """ + universe.atoms.ids = universe.atoms.ids + 23 + universe.atoms.write(outfile, reindex=False, bonds='all') + with open(outfile) as infile: + for line in infile: + if line.startswith('CONECT'): + assert line.strip() == "CONECT 23 24 25 26 27" + break + else: + raise AssertError('No CONECT record fond in the output.') + + @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING) + def test_reindex(self, universe, outfile): + """ + When setting the `reindex` keyword to True, the atom are + reindexed. + """ + universe.atoms.ids = universe.atoms.ids + 23 + universe.atoms.write(outfile, reindex=True) + read_universe = mda.Universe(outfile) + # AG.ids is 1-based, while AG.indices is 0-based, hence the +1 + assert np.all(read_universe.atoms.ids == universe.atoms.indices + 1) + + def test_no_reindex_missing_ids(self, u_no_ids, outfile): + """ + When setting `reindex` to False, if there is no AG.ids, + then an exception is raised. + """ + # Making sure AG.ids is indeed missing + assert not hasattr(u_no_ids.atoms, 'ids') + with pytest.raises(mda.exceptions.NoDataError): + u_no_ids.atoms.write(outfile, reindex=False) + class TestMultiPDBReader(object): @staticmethod diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 7bdb845f95a..1fc1bea543d 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -20,59 +20,507 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -import warnings +import copy import pytest import MDAnalysis as mda +from MDAnalysis.topology.guessers import guess_atom_element import numpy as np from numpy.testing import (assert_equal, assert_almost_equal) -from MDAnalysisTests.datafiles import mol2_molecule +from MDAnalysisTests.datafiles import mol2_molecule, PDB_full, GRO, PDB_helix +from MDAnalysisTests.util import import_not_available -Chem = pytest.importorskip("rdkit.Chem") -AllChem = pytest.importorskip("rdkit.Chem.AllChem") -def mol2_mol(): - return Chem.MolFromMol2File(mol2_molecule, removeHs=False) +try: + from rdkit import Chem + from rdkit.Chem import AllChem + from MDAnalysis.coordinates.RDKit import ( + RDATTRIBUTES, + _add_mda_attr_to_rdkit, + _infer_bo_and_charges, + _standardize_patterns, + _rebuild_conjugated_bonds, + _set_atom_property, + _reassign_props_after_reaction, + ) +except ImportError: + pass -def smiles_mol(): - mol = Chem.MolFromSmiles("CCO") - mol = Chem.AddHs(mol) - cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) - return mol +requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), + reason="requires RDKit") + + +@pytest.mark.skipif(not import_not_available("rdkit"), + reason="only for min dependencies build") +class TestRequiresRDKit(object): + def test_converter_requires_rdkit(self): + u = mda.Universe(PDB_full) + with pytest.raises(ImportError, + match="RDKit is required for the RDKitConverter"): + u.atoms.convert_to("RDKIT") + + +@requires_rdkit +class MolFactory: + def mol2_mol(): + return Chem.MolFromMol2File(mol2_molecule, removeHs=False) + + def smiles_mol(): + mol = Chem.MolFromSmiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") + mol = Chem.AddHs(mol) + cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) + return mol + + def dummy_product(): + mol = Chem.RWMol() + atom = Chem.Atom(1) + atom.SetIntProp("old_mapno", 0) + atom.SetUnsignedProp("react_atom_idx", 0) + mol.AddAtom(atom) + return mol + + def dummy_product_nomap(): + mol = Chem.RWMol() + atom = Chem.Atom(1) + atom.SetUnsignedProp("react_atom_idx", 0) + mol.AddAtom(atom) + return mol + + def dummy_reactant_noprops(): + mol = Chem.RWMol() + atom = Chem.Atom(1) + mol.AddAtom(atom) + return mol + + def dummy_reactant(): + mol = Chem.RWMol() + atom = Chem.Atom(1) + atom.SetProp("foo", "bar") + atom.SetIntProp("_MDAnalysis_index", 1) + atom.SetDoubleProp("_MDAnalysis_charge", 4.2) + atom.SetProp("_MDAnalysis_type", "C.3") + mol.AddAtom(atom) + return mol + + +@pytest.fixture(scope="function") +def rdmol(request): + return getattr(MolFactory, request.param)() + + +@pytest.fixture(scope="function") +def product(request): + return getattr(MolFactory, request.param)() + + +@requires_rdkit class TestRDKitReader(object): @pytest.mark.parametrize("rdmol, n_frames", [ - (mol2_mol(), 1), - (smiles_mol(), 3), - ]) + ("mol2_mol", 1), + ("smiles_mol", 3), + ], indirect=["rdmol"]) def test_coordinates(self, rdmol, n_frames): universe = mda.Universe(rdmol) assert universe.trajectory.n_frames == n_frames expected = np.array([ - conf.GetPositions() for conf in rdmol.GetConformers()], + conf.GetPositions() for conf in rdmol.GetConformers()], dtype=np.float32) assert_equal(expected, universe.trajectory.coordinate_array) def test_no_coordinates(self): - with warnings.catch_warnings(record=True) as w: - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - # Trigger a warning. + with pytest.warns(UserWarning, match="No coordinates found"): u = mda.Universe.from_smiles("CCO", generate_coordinates=False) - # Verify the warning - assert len(w) == 1 - assert "No coordinates found" in str( - w[-1].message) - expected = np.empty((1,u.atoms.n_atoms,3), dtype=np.float32) + expected = np.empty((1, u.atoms.n_atoms, 3), dtype=np.float32) expected[:] = np.nan assert_equal(u.trajectory.coordinate_array, expected) def test_compare_mol2reader(self): - universe = mda.Universe(mol2_mol()) + universe = mda.Universe(MolFactory.mol2_mol()) mol2 = mda.Universe(mol2_molecule) assert universe.trajectory.n_frames == mol2.trajectory.n_frames - assert_equal(universe.trajectory.ts.positions, + assert_equal(universe.trajectory.ts.positions, mol2.trajectory.ts.positions) - + + +@requires_rdkit +class TestRDKitConverter(object): + @pytest.fixture + def pdb(self): + return mda.Universe(PDB_full) + + @pytest.fixture + def mol2(self): + u = mda.Universe(mol2_molecule) + # add elements + elements = np.array([guess_atom_element(x) for x in u.atoms.types], + dtype=object) + u.add_TopologyAttr('elements', elements) + return u + + @pytest.fixture + def peptide(self): + u = mda.Universe(GRO) + elements = mda.topology.guessers.guess_types(u.atoms.names) + u.add_TopologyAttr('elements', elements) + return u.select_atoms("resid 2-12") + + @pytest.mark.parametrize("smi", ["[H]", "C", "O", "[He]"]) + def test_single_atom_mol(self, smi): + u = mda.Universe.from_smiles(smi, addHs=False, + generate_coordinates=False) + mol = u.atoms.convert_to("RDKIT") + assert mol.GetNumAtoms() == 1 + assert mol.GetAtomWithIdx(0).GetSymbol() == smi.strip("[]") + + @pytest.mark.parametrize("resname, n_atoms, n_fragments", [ + ("PRO", 14, 1), + ("ILE", 38, 1), + ("ALA", 20, 2), + ("GLY", 21, 3), + ]) + def test_mol_from_selection(self, peptide, resname, n_atoms, n_fragments): + mol = peptide.select_atoms("resname %s" % resname).convert_to("RDKIT") + assert n_atoms == mol.GetNumAtoms() + assert n_fragments == len(Chem.GetMolFrags(mol)) + + @pytest.mark.parametrize("sel_str, atom_index", [ + ("resid 1", 0), + ("resname LYS and name NZ", 1), + ("resid 34 and altloc B", 2), + ]) + def test_monomer_info(self, pdb, sel_str, atom_index): + sel = pdb.select_atoms(sel_str) + umol = sel.convert_to("RDKIT") + atom = umol.GetAtomWithIdx(atom_index) + mda_index = atom.GetIntProp("_MDAnalysis_index") + mi = atom.GetMonomerInfo() + + for mda_attr, rd_attr in RDATTRIBUTES.items(): + rd_value = getattr(mi, "Get%s" % rd_attr)() + if hasattr(sel, mda_attr): + mda_value = getattr(sel, mda_attr)[mda_index] + if mda_attr == "names": + rd_value = rd_value.strip() + assert rd_value == mda_value + + @pytest.mark.parametrize("rdmol", ["mol2_mol", "smiles_mol"], + indirect=True) + def test_identical_topology(self, rdmol): + u = mda.Universe(rdmol) + umol = u.atoms.convert_to("RDKIT") + assert rdmol.HasSubstructMatch(umol) and umol.HasSubstructMatch(rdmol) + u2 = mda.Universe(umol) + assert_equal(u.atoms.bonds, u2.atoms.bonds) + assert_equal(u.atoms.elements, u2.atoms.elements) + assert_equal(u.atoms.names, u2.atoms.names) + assert_almost_equal(u.atoms.positions, u2.atoms.positions, decimal=7) + + def test_raise_requires_elements(self): + u = mda.Universe(mol2_molecule) + with pytest.raises( + AttributeError, + match="`elements` attribute is required for the RDKitConverter" + ): + u.atoms.convert_to("RDKIT") + + def test_warn_guess_bonds(self): + u = mda.Universe(PDB_helix) + with pytest.warns(UserWarning, + match="No `bonds` attribute in this AtomGroup"): + u.atoms.convert_to("RDKIT") + + def test_warn_no_hydrogen(self): + u = mda.Universe.from_smiles("O=O") + with pytest.warns( + UserWarning, + match="No hydrogen atom could be found in the topology" + ): + u.atoms.convert_to("RDKIT") + + @pytest.mark.parametrize("attr, value, expected", [ + ("names", "C1", " C1 "), + ("names", "C12", " C12"), + ("names", "Cl1", "Cl1 "), + ("altLocs", "A", "A"), + ("chainIDs", "B", "B"), + ("icodes", "C", "C"), + ("occupancies", 0.5, 0.5), + ("resnames", "LIG", "LIG"), + ("resids", 123, 123), + ("segindices", 1, 1), + ("tempfactors", 0.8, 0.8), + ("bfactors", 0.8, 0.8), + ]) + def test_add_mda_attr_to_rdkit(self, attr, value, expected): + mi = Chem.AtomPDBResidueInfo() + _add_mda_attr_to_rdkit(attr, value, mi) + rdvalue = getattr(mi, "Get%s" % RDATTRIBUTES[attr])() + assert rdvalue == expected + + def test_bfactors_tempfactors_raises_error(self): + u = mda.Universe.from_smiles("C") + bfactors = np.array(u.atoms.n_atoms*[1.0], dtype=np.float32) + u.add_TopologyAttr('bfactors', bfactors) + u.add_TopologyAttr('tempfactors', bfactors) + with pytest.raises( + AttributeError, + match="Both `tempfactors` and `bfactors` attributes are present" + ): + u.atoms.convert_to("RDKIT") + + @pytest.mark.parametrize("idx", [0, 10, 42]) + def test_other_attributes(self, mol2, idx): + mol = mol2.atoms.convert_to("RDKIT") + rdatom = mol.GetAtomWithIdx(idx) + rdprops = rdatom.GetPropsAsDict() + mda_idx = int(rdprops["_MDAnalysis_index"]) + for prop in ["charge", "segid", "type"]: + rdprop = rdprops["_MDAnalysis_%s" % prop] + mdaprop = getattr(mol2.atoms[mda_idx], prop) + assert rdprop == mdaprop + + @pytest.mark.parametrize("sel_str", [ + "resname ALA", + "resname PRO and segid A", + ]) + def test_index_property(self, pdb, sel_str): + ag = pdb.select_atoms(sel_str) + mol = ag.convert_to("RDKIT") + expected = [i for i in range(len(ag))] + indices = sorted([a.GetIntProp("_MDAnalysis_index") + for a in mol.GetAtoms()]) + assert_equal(indices, expected) + + def test_assign_coordinates(self, pdb): + mol = pdb.atoms.convert_to("RDKIT") + positions = mol.GetConformer().GetPositions() + indices = sorted(mol.GetAtoms(), + key=lambda a: a.GetIntProp("_MDAnalysis_index")) + indices = [a.GetIdx() for a in indices] + assert_almost_equal(positions[indices], pdb.atoms.positions) + + def test_assign_stereochemistry(self, mol2): + umol = mol2.atoms.convert_to("RDKIT") + rdmol = Chem.MolFromMol2File(mol2_molecule, removeHs=False) + assert rdmol.HasSubstructMatch( + umol, useChirality=True) and umol.HasSubstructMatch( + rdmol, useChirality=True) + + def test_trajectory_coords(self): + u = mda.Universe.from_smiles( + "CCO", numConfs=3, rdkit_kwargs=dict(randomSeed=42)) + for ts in u.trajectory: + mol = u.atoms.convert_to("RDKIT") + positions = mol.GetConformer().GetPositions() + indices = sorted(mol.GetAtoms(), + key=lambda a: a.GetIntProp("_MDAnalysis_index")) + indices = [a.GetIdx() for a in indices] + assert_almost_equal(positions[indices], ts.positions) + + def test_nan_coords(self): + u = mda.Universe.from_smiles("CCO") + xyz = u.atoms.positions + xyz[0][2] = np.nan + u.atoms.positions = xyz + with pytest.warns(UserWarning, match="NaN detected"): + mol = u.atoms.convert_to("RDKIT") + with pytest.raises(ValueError, match="Bad Conformer Id"): + mol.GetConformer() + + def test_cache(self): + u = mda.Universe.from_smiles("CCO", numConfs=5) + ag = u.atoms + cache = mda.coordinates.RDKit.RDKitConverter._cache + previous_cache = None + for ts in u.trajectory: + mol = ag.convert_to("RDKIT") + if previous_cache: + # the cache shouldn't change when iterating on timesteps + assert cache == previous_cache + previous_cache = copy.deepcopy(cache) + # cached molecule shouldn't store coordinates + mol = list(cache.values())[0] + with pytest.raises(ValueError, match="Bad Conformer Id"): + mol.GetConformer() + # only 1 molecule should be cached + u = mda.Universe.from_smiles("C") + mol = u.atoms.convert_to("RDKIT") + assert len(cache) == 1 + assert cache != previous_cache + # converter with kwargs + rdkit_converter = mda.coordinates.RDKit.RDKitConverter().convert + # cache should depend on passed arguments + previous_cache = copy.deepcopy(cache) + mol = rdkit_converter(u.atoms, NoImplicit=False) + assert cache != previous_cache + # skip cache + mol = rdkit_converter(u.atoms, cache=False) + assert cache == {} + + +@requires_rdkit +class TestRDKitFunctions(object): + @pytest.mark.parametrize("smi, out", [ + ("C(-[H])(-[H])(-[H])-[H]", "C"), + ("[C](-[H])(-[H])-[C](-[H])-[H]", "C=C"), + ("[C]1(-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C]1(-[H])", + "c1ccccc1"), + ("C-[C](-[H])-[O]", "C(=O)C"), + ("[H]-[C](-[O])-[N](-[H])-[H]", "C(=O)N"), + ("[N]-[C]-[H]", "N#C"), + ("C-[C](-[O]-[H])-[O]", "CC(=O)O"), + ("[P](-[O]-[H])(-[O]-[H])(-[O]-[H])-[O]", "P(O)(O)(O)=O"), + ("[P](-[O]-[H])(-[O]-[H])(-[O])-[O]", "P([O-])(O)(O)=O"), + ("[P](-[O]-[H])(-[O])(-[O])-[O]", "P([O-])([O-])(O)=O"), + ("[P](-[O])(-[O])(-[O])-[O]", "P([O-])([O-])([O-])=O"), + ("[H]-[O]-[N]-[O]", "ON=O"), + ("[N]-[C]-[O]", "N#C[O-]"), + ]) + def test_infer_bond_orders(self, smi, out): + mol = Chem.MolFromSmiles(smi, sanitize=False) + mol.UpdatePropertyCache(strict=False) + _infer_bo_and_charges(mol) + Chem.SanitizeMol(mol) + mol = Chem.RemoveHs(mol) + molref = Chem.MolFromSmiles(out) + assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch( + mol), "{} != {}".format(Chem.MolToSmiles(mol), out) + + @pytest.mark.parametrize("smi, atom_idx, charge", [ + ("[C](-[H])(-[H])(-[H])-[O]", 4, -1), + ("[N]-[C]-[O]", 2, -1), + ("[N](-[H])(-[H])(-[H])-[H]", 0, 1), + ("C-[C](-[O])-[O]", 3, -1), + ]) + def test_infer_charges(self, smi, atom_idx, charge): + mol = Chem.MolFromSmiles(smi, sanitize=False) + mol.UpdatePropertyCache(strict=False) + _infer_bo_and_charges(mol) + Chem.SanitizeMol(mol) + assert mol.GetAtomWithIdx(atom_idx).GetFormalCharge() == charge + + @pytest.mark.parametrize("smi, out", [ + ("[S](-[O]-[H])(-[O]-[H])(-[O])-[O]", "S(=O)(=O)(O)O"), + ("[S](-[O]-[H])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])O"), + ("[S](-[O])(-[O])(-[O])-[O]", "S(=O)(=O)([O-])[O-]"), + ("C-[N](-[H])-[C](-[N](-[H])-[H])-[N](-[H])-[H]", + "CNC(N)=[N+](-[H])-[H]"), + ("[O]-[C](-[H])-[C](-[H])-[H]", "C([O-])=C"), + ("C-[N](-[O])-[O]", "C[N+](=O)[O-]"), + ("C(-[N](-[O])-[O])-[N](-[O])-[O]", "C([N+](=O)[O-])[N+](=O)[O-]"), + ("C-[N](-[O])-[O].C-[N](-[O])-[O]", "C[N+](=O)[O-].C[N+](=O)[O-]"), + ("[C-](=O)-C", "[C](=O)-C"), + ("[H]-[N-]-C", "[H]-[N]-C"), + ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])-[C](-[H])1", + "[O-]c1ccccc1"), + ("[O]-[C]1-[C](-[H])-[C](-[H])-[C](-[H])-[C]1-[O]", + "[O-]C1=CC=CC1=O"), + ("[H]-[C]-[C]-[C](-[H])-[C](-[H])-[H]", "C#CC=C"), + ("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"), + ]) + def test_standardize_patterns(self, smi, out): + mol = Chem.MolFromSmiles(smi, sanitize=False) + mol.UpdatePropertyCache(strict=False) + _infer_bo_and_charges(mol) + mol = _standardize_patterns(mol) + Chem.SanitizeMol(mol) + mol = Chem.RemoveHs(mol) + molref = Chem.MolFromSmiles(out) + assert mol.HasSubstructMatch(molref) and molref.HasSubstructMatch( + mol), "{} != {}".format(Chem.MolToSmiles(mol), out) + + @pytest.mark.parametrize("attr, value, getter", [ + ("index", 42, "GetIntProp"), + ("index", np.int(42), "GetIntProp"), + ("charge", 4.2, "GetDoubleProp"), + ("charge", np.float(4.2), "GetDoubleProp"), + ("type", "C.3", "GetProp"), + ]) + def test_set_atom_property(self, attr, value, getter): + atom = Chem.Atom(1) + prop = "_MDAnalysis_%s" % attr + _set_atom_property(atom, prop, value) + assert getattr(atom, getter)(prop) == value + + @pytest.mark.parametrize("rdmol, product, name", [ + ("dummy_reactant", "dummy_product", "props"), + ("dummy_reactant_noprops", "dummy_product", "noprops"), + ("dummy_reactant", "dummy_product_nomap", "nomap"), + ], indirect=["rdmol", "product"]) + def test_reassign_props_after_reaction(self, rdmol, product, name): + _reassign_props_after_reaction(rdmol, product) + atom = product.GetAtomWithIdx(0) + if name == "props": + assert atom.GetProp("foo") == "bar" + assert atom.GetIntProp("_MDAnalysis_index") == 1 + assert atom.GetDoubleProp("_MDAnalysis_charge") == 4.2 + assert atom.GetProp("_MDAnalysis_type") == "C.3" + with pytest.raises(KeyError, match="old_mapno"): + atom.GetIntProp("old_mapno") + with pytest.raises(KeyError, match="react_atom_idx"): + atom.GetUnsignedProp("react_atom_idx") + elif name == "noprops": + with pytest.raises(KeyError, match="old_mapno"): + atom.GetIntProp("old_mapno") + with pytest.raises(KeyError, match="react_atom_idx"): + atom.GetUnsignedProp("react_atom_idx") + elif name == "nomap": + with pytest.raises(KeyError, match="react_atom_idx"): + atom.GetUnsignedProp("react_atom_idx") + with pytest.raises(KeyError, match="_MDAnalysis_index"): + atom.GetIntProp("_MDAnalysis_index") + + @pytest.mark.parametrize("smi_in", [ + "c1ccc(cc1)-c1ccccc1-c1ccccc1", + "c1cc[nH]c1", + "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1", + "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21", + "c1csc(c1)-c1ccoc1-c1cc[nH]c1", + "C1=C2C(=NC=N1)N=CN2", + "CN1C=NC(=C1SC2=NC=NC3=C2NC=N3)[N+](=O)[O-]", + "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1", + "C=CC=CC=CC=CC=CC=C", + "NCCCCC([NH3+])C(=O)[O-]", + "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C", + "C#CC=C", + # HID HIE HIP residues, see PR #2941 + "O=C([C@H](CC1=CNC=N1)N)O", + "O=C([C@H](CC1=CN=CN1)N)O", + "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", + ]) + def test_order_independant(self, smi_in): + # generate mol with hydrogens but without bond orders + ref = Chem.MolFromSmiles(smi_in) + template = Chem.AddHs(ref) + for atom in template.GetAtoms(): + atom.SetIsAromatic(False) + atom.SetFormalCharge(0) + for bond in template.GetBonds(): + bond.SetIsAromatic(False) + bond.SetBondType(Chem.BondType.SINGLE) + + # go through each possible starting atom + for a in template.GetAtoms(): + smi = Chem.MolToSmiles(template, rootedAtAtom=a.GetIdx()) + m = Chem.MolFromSmiles(smi, sanitize=False) + for atom in m.GetAtoms(): + atom.SetFormalCharge(0) + atom.SetNoImplicit(True) + m.UpdatePropertyCache(strict=False) + _infer_bo_and_charges(m) + m = _standardize_patterns(m) + Chem.SanitizeMol(m) + m = Chem.RemoveHs(m) + assert m.HasSubstructMatch(ref) and ref.HasSubstructMatch( + m), (f"(input) {Chem.MolToSmiles(ref)} != " + f"{Chem.MolToSmiles(m)} (output) root atom {a.GetIdx()}") + + def test_warn_conjugated_max_iter(self): + smi = "[C-]C=CC=CC=CC=CC=CC=C[C-]" + mol = Chem.MolFromSmiles(smi) + with pytest.warns(UserWarning, + match="reasonable number of iterations"): + _rebuild_conjugated_bonds(mol, 2) diff --git a/testsuite/MDAnalysisTests/core/test_accumulate.py b/testsuite/MDAnalysisTests/core/test_accumulate.py index 2d096e72d2e..df0a96afcbc 100644 --- a/testsuite/MDAnalysisTests/core/test_accumulate.py +++ b/testsuite/MDAnalysisTests/core/test_accumulate.py @@ -116,6 +116,11 @@ def test_total_charge_compounds(self, group, name, compound): ref = [sum(a.charges) for a in group.atoms.groupby(name).values()] assert_almost_equal(group.total_charge(compound=compound), ref) + @pytest.mark.filterwarnings( # Prevents regression of issue #2990 + "error:" + "Using a non-tuple sequence for multidimensional indexing is deprecated:" + "FutureWarning" + ) def test_total_charge_duplicates(self, group): group2 = group + group[0] ref = group.total_charge() + group[0].charge @@ -133,6 +138,11 @@ def test_total_mass_compounds(self, group, name, compound): ref = [sum(a.masses) for a in group.atoms.groupby(name).values()] assert_almost_equal(group.total_mass(compound=compound), ref) + @pytest.mark.filterwarnings( # Prevents regression of issue #2990 + "error:" + "Using a non-tuple sequence for multidimensional indexing is deprecated:" + "FutureWarning" + ) def test_total_mass_duplicates(self, group): group2 = group + group[0] ref = group.total_mass() + group2[0].mass diff --git a/testsuite/MDAnalysisTests/core/test_atomgroup.py b/testsuite/MDAnalysisTests/core/test_atomgroup.py index 520519046e3..5d6421d1856 100644 --- a/testsuite/MDAnalysisTests/core/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/core/test_atomgroup.py @@ -991,6 +991,12 @@ def ag(self): group.wrap(inplace=True) return group + @pytest.fixture() + def unordered_ag(self, ag): + ndx = np.arange(len(ag)) + np.random.shuffle(ndx) + return ag[ndx] + @pytest.fixture() def ref_noUnwrap_residues(self): return { @@ -1013,12 +1019,12 @@ def ref_Unwrap_residues(self): 'COG': np.array([[21.356, 41.685, 40.501], [44.577, 43.312, 79.039], [ 2.204, 27.722, 54.023]], dtype=np.float32), - 'COM': np.array([[20.815, 42.013, 39.802], - [44.918, 43.282, 79.325], - [2.045, 28.243, 54.127]], dtype=np.float32), - 'MOI': np.array([[16747.486, -1330.489, 2938.243], - [-1330.489, 19315.253, 3306.212], - [ 2938.243, 3306.212, 8990.481]]), + 'COM': np.array([[21.286, 41.664, 40.465], + [44.528, 43.426, 78.671], + [ 2.111, 27.871, 53.767]], dtype=np.float32), + 'MOI': np.array([[16687.941, -1330.617, 2925.883], + [-1330.617, 19256.178, 3354.832], + [ 2925.883, 3354.832, 8989.946]]), 'Asph': 0.2969491080, } @@ -1058,6 +1064,12 @@ def test_UnWrapFlag_residues(self, ag, ref_Unwrap_residues): assert_almost_equal(ag.moment_of_inertia(unwrap=True, compound='residues'), ref_Unwrap_residues['MOI'], self.prec) assert_almost_equal(ag.asphericity(unwrap=True, compound='residues'), ref_Unwrap_residues['Asph'], self.prec) + def test_UnWrapFlag_residues_unordered(self, unordered_ag, ref_Unwrap_residues): + assert_almost_equal(unordered_ag.center_of_geometry(unwrap=True, compound='residues'), ref_Unwrap_residues['COG'], self.prec) + assert_almost_equal(unordered_ag.center_of_mass(unwrap=True, compound='residues'), ref_Unwrap_residues['COM'], self.prec) + assert_almost_equal(unordered_ag.moment_of_inertia(unwrap=True, compound='residues'), ref_Unwrap_residues['MOI'], self.prec) + assert_almost_equal(unordered_ag.asphericity(unwrap=True, compound='residues'), ref_Unwrap_residues['Asph'], self.prec) + def test_default(self, ref_noUnwrap): u = UnWrapUniverse(is_triclinic=False) group = u.atoms[31:39] # molecules 11 @@ -1281,22 +1293,27 @@ def test_center_of_mass_compounds(self, ag, name, compound): @pytest.mark.parametrize('name, compound', (('resids', 'residues'), ('segids', 'segments'))) - def test_center_of_geometry_compounds_pbc(self, ag, name, compound): + @pytest.mark.parametrize('unwrap', (True, False)) + def test_center_of_geometry_compounds_pbc(self, ag, name, compound, + unwrap): ag.dimensions = [50, 50, 50, 90, 90, 90] - ref = [a.center_of_geometry() for a in ag.groupby(name).values()] + ref = [a.center_of_geometry(unwrap=unwrap) + for a in ag.groupby(name).values()] ref = distances.apply_PBC(np.asarray(ref, dtype=np.float32), - ag.dimensions) - cog = ag.center_of_geometry(pbc=True, compound=compound) + ag.dimensions) + cog = ag.center_of_geometry(pbc=True, compound=compound, unwrap=unwrap) assert_almost_equal(cog, ref, decimal=5) @pytest.mark.parametrize('name, compound', (('resids', 'residues'), ('segids', 'segments'))) - def test_center_of_mass_compounds_pbc(self, ag, name, compound): + @pytest.mark.parametrize('unwrap', (True, False)) + def test_center_of_mass_compounds_pbc(self, ag, name, compound, unwrap): ag.dimensions = [50, 50, 50, 90, 90, 90] - ref = [a.center_of_mass() for a in ag.groupby(name).values()] + ref = [a.center_of_mass(unwrap=unwrap) + for a in ag.groupby(name).values()] ref = distances.apply_PBC(np.asarray(ref, dtype=np.float32), - ag.dimensions) - com = ag.center_of_mass(pbc=True, compound=compound) + ag.dimensions) + com = ag.center_of_mass(pbc=True, compound=compound, unwrap=unwrap) assert_almost_equal(com, ref, decimal=5) @pytest.mark.parametrize('name, compound', (('molnums', 'molecules'), @@ -1317,24 +1334,30 @@ def test_center_of_mass_compounds_special(self, ag_molfrg, @pytest.mark.parametrize('name, compound', (('molnums', 'molecules'), ('fragindices', 'fragments'))) + @pytest.mark.parametrize('unwrap', (True, False)) def test_center_of_geometry_compounds_special_pbc(self, ag_molfrg, - name, compound): + name, compound, unwrap): ag_molfrg.dimensions = [50, 50, 50, 90, 90, 90] - ref = [a.center_of_geometry() for a in ag_molfrg.groupby(name).values()] + ref = [a.center_of_geometry(unwrap=unwrap) + for a in ag_molfrg.groupby(name).values()] ref = distances.apply_PBC(np.asarray(ref, dtype=np.float32), - ag_molfrg.dimensions) - cog = ag_molfrg.center_of_geometry(pbc=True, compound=compound) + ag_molfrg.dimensions) + cog = ag_molfrg.center_of_geometry(pbc=True, compound=compound, + unwrap=unwrap) assert_almost_equal(cog, ref, decimal=5) @pytest.mark.parametrize('name, compound', (('molnums', 'molecules'), ('fragindices', 'fragments'))) + @pytest.mark.parametrize('unwrap', (True, False)) def test_center_of_mass_compounds_special_pbc(self, ag_molfrg, - name, compound): + name, compound, unwrap): ag_molfrg.dimensions = [50, 50, 50, 90, 90, 90] - ref = [a.center_of_mass() for a in ag_molfrg.groupby(name).values()] + ref = [a.center_of_mass(unwrap=unwrap) + for a in ag_molfrg.groupby(name).values()] ref = distances.apply_PBC(np.asarray(ref, dtype=np.float32), - ag_molfrg.dimensions) - com = ag_molfrg.center_of_mass(pbc=True, compound=compound) + ag_molfrg.dimensions) + com = ag_molfrg.center_of_mass(pbc=True, compound=compound, + unwrap=unwrap) assert_almost_equal(com, ref, decimal=5) def test_center_wrong_compound(self, ag): diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index b7d0f515b7f..d0768e499a6 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -42,6 +42,7 @@ TRZ_psf, TRZ, PDB_icodes, PDB_HOLE, + PDB_helix, ) from MDAnalysisTests import make_Universe @@ -74,7 +75,7 @@ def test_protein(self, universe): sorted(universe.select_atoms('segid 4AKE').indices), "selected protein is not the same as auto-generated protein segment s4AKE") - @pytest.mark.parametrize('resname', MDAnalysis.core.selection.ProteinSelection.prot_res) + @pytest.mark.parametrize('resname', sorted(MDAnalysis.core.selection.ProteinSelection.prot_res)) def test_protein_resnames(self, resname): u = make_Universe(('resnames',)) # set half the residues' names to the resname we're testing @@ -526,17 +527,43 @@ def u(self): u = MDAnalysis.Universe.from_smiles(smi, addHs=False, generate_coordinates=False) return u - + + @pytest.fixture + def u2(self): + u = MDAnalysis.Universe.from_smiles("Nc1cc(C[C@H]([O-])C=O)c[nH]1") + return u + @pytest.mark.parametrize("sel_str, n_atoms", [ ("aromatic", 5), ("not aromatic", 1), ("type N and aromatic", 1), ("type C and aromatic", 4), ]) - def test_selection(self, u, sel_str, n_atoms): + def test_aromatic_selection(self, u, sel_str, n_atoms): sel = u.select_atoms(sel_str) assert sel.n_atoms == n_atoms + @pytest.mark.parametrize("sel_str, indices", [ + ("smarts n", [10]), + ("smarts [#7]", [0, 10]), + ("smarts a", [1, 2, 3, 9, 10]), + ("smarts c", [1, 2, 3, 9]), + ("smarts [*-]", [6]), + ("smarts [$([!#1]);$([!R][R])]", [0, 4]), + ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", [5]), + ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", []), + ("smarts a and type C", [1, 2, 3, 9]), + ("(smarts a) and (type C)", [1, 2, 3, 9]), + ("smarts a and type N", [10]), + ]) + def test_smarts_selection(self, u2, sel_str, indices): + sel = u2.select_atoms(sel_str) + assert_equal(sel.indices, indices) + + def test_invalid_smarts_sel_raises_error(self, u2): + with pytest.raises(ValueError, match="not a valid SMARTS"): + u2.select_atoms("smarts foo") + class TestSelectionsNucleicAcids(object): @pytest.fixture(scope='class') diff --git a/testsuite/MDAnalysisTests/core/test_fragments.py b/testsuite/MDAnalysisTests/core/test_fragments.py index 55c4d22d271..f71c48866e5 100644 --- a/testsuite/MDAnalysisTests/core/test_fragments.py +++ b/testsuite/MDAnalysisTests/core/test_fragments.py @@ -78,7 +78,7 @@ def case2(): class TestFragments(object): - """Use 125 atom test Universe + r"""Use 125 atom test Universe 5 segments of 5 residues of 5 atoms diff --git a/testsuite/MDAnalysisTests/core/test_segmentgroup.py b/testsuite/MDAnalysisTests/core/test_segmentgroup.py index 3f0c251e543..546c5ad44cf 100644 --- a/testsuite/MDAnalysisTests/core/test_segmentgroup.py +++ b/testsuite/MDAnalysisTests/core/test_segmentgroup.py @@ -88,6 +88,24 @@ def test_set_segid_updates_(universe): err_msg="old selection was not changed in place after set_segid") +def test_set_segids_many(): + u = mda.Universe.empty(n_atoms=6, n_residues=2, n_segments=2, + atom_resindex=[0, 0, 0, 1, 1, 1], residue_segindex=[0,1]) + u.add_TopologyAttr('segids', ['A', 'B']) + + # universe with 2 segments, A and B + + u.segments.segids = ['X', 'Y'] + + assert u.segments[0].segid == 'X' + assert u.segments[1].segid == 'Y' + + assert len(u.select_atoms('segid A')) == 0 + assert len(u.select_atoms('segid B')) == 0 + assert len(u.select_atoms('segid X')) == 3 + assert len(u.select_atoms('segid Y')) == 3 + + def test_atom_order(universe): assert_equal(universe.segments.atoms.indices, sorted(universe.segments.atoms.indices)) diff --git a/testsuite/MDAnalysisTests/core/test_topologyattrs.py b/testsuite/MDAnalysisTests/core/test_topologyattrs.py index 6fa082b3b3f..270491514af 100644 --- a/testsuite/MDAnalysisTests/core/test_topologyattrs.py +++ b/testsuite/MDAnalysisTests/core/test_topologyattrs.py @@ -93,6 +93,7 @@ class TestAtomAttr(TopologyAttrMixin): """ values = np.array([7, 3, 69, 9993, 84, 194, 263, 501, 109, 5873]) + single_value = 567 attrclass = tpattrs.AtomAttr def test_set_atom_VE(self): @@ -112,8 +113,9 @@ def test_get_atoms(self, attr): def test_set_atoms_singular(self, attr): # set len 2 Group to len 1 value dg = DummyGroup([3, 7]) - attr.set_atoms(dg, 567) - assert_equal(attr.get_atoms(dg), np.array([567, 567])) + attr.set_atoms(dg, self.single_value) + assert_equal(attr.get_atoms(dg), + np.array([self.single_value, self.single_value])) def test_set_atoms_plural(self, attr): # set len 2 Group to len 2 values @@ -175,6 +177,7 @@ def test_cant_set_segment_indices(self, u): class TestAtomnames(TestAtomAttr): values = np.array(['O', 'C', 'CA', 'N', 'CB', 'CG', 'CD', 'NA', 'CL', 'OW'], dtype=np.object) + single_value = 'Ca2' attrclass = tpattrs.Atomnames @@ -206,18 +209,19 @@ class TestResidueAttr(TopologyAttrMixin): """Test residue-level TopologyAttrs. """ + single_value = 2 values = np.array([15.2, 395.6, 0.1, 9.8]) attrclass = tpattrs.ResidueAttr - def test_set_residue_VE(self): - u = make_Universe(('resnames',)) - res = u.residues[0] + def test_set_residue_VE(self, universe): + # setting e.g. resname to 2 values should fail with VE + res = universe.residues[0] with pytest.raises(ValueError): - setattr(res, 'resname', ['wrong', 'length']) + setattr(res, self.attrclass.singular, self.values[:2]) def test_get_atoms(self, attr): assert_equal(attr.get_atoms(DummyGroup([7, 3, 9])), - self.values[[3, 2, 2]]) + self.values[[3, 2, 2]]) def test_get_atom(self, universe): attr = getattr(universe.atoms[0], self.attrclass.singular) @@ -225,14 +229,14 @@ def test_get_atom(self, universe): def test_get_residues(self, attr): assert_equal(attr.get_residues(DummyGroup([1, 2, 1, 3])), - self.values[[1, 2, 1, 3]]) + self.values[[1, 2, 1, 3]]) def test_set_residues_singular(self, attr): dg = DummyGroup([3, 0, 1]) - attr.set_residues(dg, 2) + attr.set_residues(dg, self.single_value) - assert_almost_equal(attr.get_residues(dg), - np.array([2, 2, 2])) + assert_equal(attr.get_residues(dg), + np.array([self.single_value]*3, dtype=self.values.dtype)) def test_set_residues_plural(self, attr): attr.set_residues(DummyGroup([3, 0, 1]), @@ -254,10 +258,17 @@ def test_get_segments(self, attr): assert_equal(attr.get_segments(DummyGroup([0, 1, 1])), [self.values[[0, 3]], self.values[[1, 2]], self.values[[1, 2]]]) -class TestICodes(TestResidueAttr): - values = np.array(['a', 'b', '', 'd']) + +class TestResnames(TestResidueAttr): + attrclass = tpattrs.Resnames + single_value = 'xyz' + values = np.array(['a', 'b', '', 'd'], dtype=object) + + +class TestICodes(TestResnames): attrclass = tpattrs.ICodes + class TestResids(TestResidueAttr): values = np.array([10, 11, 18, 20]) attrclass = tpattrs.Resids diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 898bd05b1ce..e4db121fbba 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -315,6 +315,7 @@ PDB_xvf = resource_filename(__name__, 'data/cobrotoxin.pdb') TPR_xvf = resource_filename(__name__, 'data/cobrotoxin.tpr') +H5MD_xvf = resource_filename(__name__, 'data/cobrotoxin.h5md') TRR_xvf = resource_filename(__name__, 'data/cobrotoxin.trr') H5MD_xvf = resource_filename(__name__, 'data/cobrotoxin.h5md') XVG_BZ2 = resource_filename(__name__, 'data/cobrotoxin_protein_forces.xvg.bz2') diff --git a/testsuite/MDAnalysisTests/formats/test_libdcd.py b/testsuite/MDAnalysisTests/formats/test_libdcd.py index 85006c9a879..40edbd15296 100644 --- a/testsuite/MDAnalysisTests/formats/test_libdcd.py +++ b/testsuite/MDAnalysisTests/formats/test_libdcd.py @@ -20,6 +20,7 @@ import sys import string import struct +import platform import hypothesis.strategies as strategies from hypothesis import example, given @@ -351,8 +352,10 @@ def write_dcd(in_name, out_name, remarks='testing', header=None): f_out.write(xyz=frame.xyz, box=frame.unitcell) -@pytest.mark.xfail(os.name == 'nt' and sys.maxsize <= 2**32, - reason="occasional fail on 32-bit windows") +@pytest.mark.xfail((os.name == 'nt' + and sys.maxsize <= 2**32) or + platform.machine() == 'aarch64', + reason="occasional fail on 32-bit windows and ARM") @given(remarks=strategies.text( alphabet=string.printable, min_size=0, max_size=239)) # handle the printable ASCII strings diff --git a/testsuite/MDAnalysisTests/parallelism/test_pickle_transformation.py b/testsuite/MDAnalysisTests/parallelism/test_pickle_transformation.py new file mode 100644 index 00000000000..d663c75cff5 --- /dev/null +++ b/testsuite/MDAnalysisTests/parallelism/test_pickle_transformation.py @@ -0,0 +1,153 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest +import pickle +from numpy.testing import assert_almost_equal + +import MDAnalysis as mda + +from MDAnalysis.transformations.fit import fit_translation, fit_rot_trans +from MDAnalysis.transformations.positionaveraging import PositionAverager +from MDAnalysis.transformations.rotate import rotateby +from MDAnalysis.transformations.translate import translate, center_in_box +from MDAnalysis.transformations.wrap import wrap, unwrap + +from MDAnalysisTests.datafiles import PSF_TRICLINIC, DCD_TRICLINIC + + +@pytest.fixture(params=[ + (PSF_TRICLINIC, DCD_TRICLINIC), +]) +def u(request): + top, traj = request.param + return mda.Universe(top, traj) + + +@pytest.fixture() +def fit_translation_transformation(u): + ag = u.atoms[0:10] + return fit_translation(ag, ag) + + +@pytest.fixture() +def fit_rot_trans_transformation(u): + ag = u.atoms[0:10] + return fit_rot_trans(ag, ag) + + +@pytest.fixture() +def PositionAverager_transformation(u): + return PositionAverager(3) + + +@pytest.fixture() +def rotateby_transformation(u): + return rotateby(90, [0, 0, 1], [1, 2, 3]) + + +@pytest.fixture() +def translate_transformation(u): + return translate([1, 2, 3]) + + +@pytest.fixture() +def center_in_box_transformation(u): + ag = u.atoms[0:10] + return center_in_box(ag) + + +@pytest.fixture() +def wrap_transformation(u): + ag = u.atoms + return wrap(ag) + + +@pytest.fixture() +def unwrap_transformation(u): + ag = u.atoms + return unwrap(ag) + + +def test_add_fit_translation_pickle(fit_translation_transformation, u): + u.trajectory.add_transformations(fit_translation_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_fit_rot_trans_pickle(fit_rot_trans_transformation, + u): + u.trajectory.add_transformations(fit_rot_trans_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_PositionAverager_pickle(PositionAverager_transformation, u): + u.trajectory.add_transformations(PositionAverager_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_rotateby_pickle(rotateby_transformation, u): + u.trajectory.add_transformations(rotateby_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_translate_pickle(translate_transformation, u): + u.trajectory.add_transformations(translate_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_center_in_box_pickle(center_in_box_transformation, u): + u.trajectory.add_transformations(center_in_box_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_wrap_pickle(wrap_transformation, u): + u.trajectory.add_transformations(wrap_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) + + +def test_add_unwrap_pickle(unwrap_transformation, u): + u.trajectory.add_transformations(unwrap_transformation) + u_p = pickle.loads(pickle.dumps(u)) + u.trajectory[0] + for u_ts, u_p_ts in zip(u.trajectory[:5], u_p.trajectory[:5]): + assert_almost_equal(u_ts.positions, u_p_ts.positions) diff --git a/testsuite/MDAnalysisTests/test_api.py b/testsuite/MDAnalysisTests/test_api.py index 752fb516953..211865db985 100644 --- a/testsuite/MDAnalysisTests/test_api.py +++ b/testsuite/MDAnalysisTests/test_api.py @@ -26,23 +26,26 @@ import MDAnalysis as mda + def test_Universe(): assert mda.Universe is mda.core.universe.Universe -def test_as_Universe(): - assert mda.as_Universe is mda.core.universe.as_Universe def test_fetch_mmtf(): assert mda.fetch_mmtf is mda.coordinates.MMTF.fetch_mmtf + def test_Writer(): assert mda.Writer is mda.coordinates.core.writer + def test_AtomGroup(): assert mda.AtomGroup is mda.core.groups.AtomGroup + def test_ResidueGroup(): assert mda.ResidueGroup is mda.core.groups.ResidueGroup + def test_SegmentGroup(): assert mda.SegmentGroup is mda.core.groups.SegmentGroup diff --git a/testsuite/MDAnalysisTests/topology/test_parmed.py b/testsuite/MDAnalysisTests/topology/test_parmed.py index 9676e694591..2d7278577d6 100644 --- a/testsuite/MDAnalysisTests/topology/test_parmed.py +++ b/testsuite/MDAnalysisTests/topology/test_parmed.py @@ -21,8 +21,10 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import pytest -import MDAnalysis as mda +import numpy as np +from numpy.testing import assert_equal +import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import ( PSF_NAMD_GBIS, @@ -31,6 +33,7 @@ pmd = pytest.importorskip('parmed') + class BaseTestParmedParser(ParserBase): parser = mda.topology.ParmEdParser.ParmEdParser expected_attrs = ['ids', 'names', 'types', 'masses', @@ -40,7 +43,7 @@ class BaseTestParmedParser(ParserBase): 'epsilon14s', 'elements', 'chainIDs', 'resids', 'resnames', 'resnums', 'segids', - 'bonds', 'ureybradleys', 'angles', + 'bonds', 'ureybradleys', 'angles', 'dihedrals', 'impropers', 'cmaps'] expected_n_atoms = 0 @@ -69,42 +72,46 @@ def test_bonds_total_counts(self, top, filename): unique = set([(a.atom1.idx, a.atom2.idx) for a in filename.bonds]) assert len(top.bonds.values) == len(unique) - + def test_angles_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx) + unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx) for a in filename.angles]) assert len(top.angles.values) == len(unique) def test_dihedrals_total_counts(self, top, filename): unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) - for a in filename.dihedrals]) + for a in filename.dihedrals]) assert len(top.dihedrals.values) == len(unique) - + def test_impropers_total_counts(self, top, filename): unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx) for a in filename.impropers]) assert len(top.impropers.values) == len(unique) def test_cmaps_total_counts(self, top, filename): - unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, + unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx, a.atom5.idx) for a in filename.cmaps]) assert len(top.cmaps.values) == len(unique) - + def test_ureybradleys_total_counts(self, top, filename): unique = set([(a.atom1.idx, a.atom2.idx) for a in filename.urey_bradleys]) assert len(top.ureybradleys.values) == len(unique) + def test_elements(self, top): + for erange, evals in zip(self.elems_ranges, self.expected_elems): + assert_equal(top.elements.values[erange[0]:erange[1]], evals, + "unexpected element match") class TestParmedParserPSF(BaseTestParmedParser): """ PSF with CMAPs """ - + ref_filename = PSF_NAMD_GBIS - + expected_n_atoms = 3341 expected_n_residues = 214 expected_n_bonds = 3365 @@ -112,21 +119,24 @@ class TestParmedParserPSF(BaseTestParmedParser): expected_n_dihedrals = 8921 expected_n_impropers = 541 expected_n_cmaps = 212 + elems_ranges = ((0, 3342),) + # No atomic numbers set by parmed == no elements + expected_elems = (np.array(['' for i in range(3341)], dtype=object),) def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 4 assert len(universe.atoms[[42]].bonds) == 1 @pytest.mark.parametrize('value', ( - (0, 1), - (0, 2), - (0, 3), + (0, 1), + (0, 2), + (0, 3), (0, 4), )) def test_bonds_identity(self, top, value): vals = top.bonds.values assert value in vals or value[::-1] in vals - + def test_bond_types(self, universe): b1 = universe.bonds[0] @@ -139,8 +149,8 @@ def test_angles_atom_counts(self, universe): assert len(universe.atoms[[42]].angles), 2 @pytest.mark.parametrize('value', ( - (1, 0, 2), - (1, 0, 3), + (1, 0, 2), + (1, 0, 3), (1, 0, 4), )) def test_angles_identity(self, top, value): @@ -151,15 +161,14 @@ def test_dihedrals_atom_counts(self, universe): assert len(universe.atoms[[0]].dihedrals) == 14 @pytest.mark.parametrize('value', ( - (0, 4, 6, 7), - (0, 4, 6, 8), - (0, 4, 6, 9), + (0, 4, 6, 7), + (0, 4, 6, 8), + (0, 4, 6, 9), (0, 4, 17, 18), )) def test_dihedrals_identity(self, top, value): vals = top.dihedrals.values assert value in vals or value[::-1] in vals - @pytest.mark.parametrize('value', ( (17, 19, 21, 41, 43), @@ -168,16 +177,22 @@ def test_dihedrals_identity(self, top, value): def test_cmaps_identity(self, top, value): vals = top.cmaps.values assert value in vals or value[::-1] in vals - + + class TestParmedParserPRM(BaseTestParmedParser): """ PRM """ - + ref_filename = PRM - + expected_n_atoms = 252 expected_n_residues = 14 + elems_ranges = ((0, 8), (30, 38)) + expected_elems = (np.array(['N', 'H', 'H', 'H', 'C', 'H', 'C', 'H'], + dtype=object), + np.array(['H', 'C', 'H', 'H', 'C', 'C', 'H', 'C'], + dtype=object)) def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 4 @@ -192,7 +207,7 @@ def test_bonds_atom_counts(self, universe): def test_bonds_identity(self, top, value): vals = top.bonds.values assert value in vals or value[::-1] in vals - + def test_bond_types(self, universe): b1 = universe.bonds[0] @@ -225,9 +240,9 @@ def test_dihedrals_atom_counts(self, universe): def test_dihedrals_identity(self, top, value): vals = top.dihedrals.values assert value in vals or value[::-1] in vals - + def test_dihedral_types(self, universe): - ag = universe.atoms[[10,12,14,16]] + ag = universe.atoms[[10, 12, 14, 16]] dih = universe.dihedrals.atomgroup_intersection(ag, strict=True)[0] assert len(dih.type) == 4 diff --git a/testsuite/MDAnalysisTests/util.py b/testsuite/MDAnalysisTests/util.py index 91a58508808..45b7121346c 100644 --- a/testsuite/MDAnalysisTests/util.py +++ b/testsuite/MDAnalysisTests/util.py @@ -153,7 +153,7 @@ def in_dir(dirname): def assert_nowarns(warning_class, *args, **kwargs): - """Fail if the given callable throws the specified warning. + r"""Fail if the given callable throws the specified warning. A warning of class warning_class should NOT be thrown by the callable when invoked with arguments args and keyword arguments kwargs. diff --git a/testsuite/MDAnalysisTests/utils/test_meta.py b/testsuite/MDAnalysisTests/utils/test_meta.py index 3de7b828de9..15a30961c44 100644 --- a/testsuite/MDAnalysisTests/utils/test_meta.py +++ b/testsuite/MDAnalysisTests/utils/test_meta.py @@ -44,7 +44,7 @@ def test_version_format(version=None): import MDAnalysis.version version = MDAnalysis.version.__version__ # see https://github.com/MDAnalysis/mdanalysis/wiki/SemanticVersioning for format definition - m = re.match('(?P\d+)\.(?P\d+)\.(?P\d+)(-(?P\w+))?$', + m = re.match(r'(?P\d+)\.(?P\d+)\.(?P\d+)(-(?P\w+))?$', version) assert m, "version {0} does not match the MAJOR.MINOR.PATCH(-suffix) format".format(version) diff --git a/testsuite/setup.cfg b/testsuite/setup.cfg index 0473b68e784..0c926b54caf 100644 --- a/testsuite/setup.cfg +++ b/testsuite/setup.cfg @@ -10,6 +10,7 @@ filterwarnings= # don't enforce for third party packages though: ignore:`np.*` is a deprecated alias for the builtin:DeprecationWarning:networkx.*: error:Creating an ndarray from ragged nested sequences + error:invalid escape sequence \\:DeprecationWarning # Settings to test for warnings we throw # [tool:pytest] diff --git a/testsuite/setup.py b/testsuite/setup.py index f454ba9e9cd..0bb0d73ca73 100755 --- a/testsuite/setup.py +++ b/testsuite/setup.py @@ -136,10 +136,11 @@ def run(self): 'MDAnalysisTests.plugins': 'MDAnalysisTests/plugins'}, package_data={'MDAnalysisTests': ['data/*.psf', 'data/*.dcd', 'data/*.pdb', - 'data/tprs/*.tpr', 'data/tprs/all_bonded/*.tpr', + 'data/tprs/*.tpr', 'data/tprs/all_bonded/*.gro', 'data/tprs/all_bonded/*.top', 'data/tprs/all_bonded/*.mdp', 'data/*.tpr', + 'data/tprs/*/*.tpr', 'data/*.gro', 'data/*.xtc', 'data/*.trr', 'data/*npy', 'data/*.crd', 'data/*.xyz', 'data/Amber/*.bz2', @@ -172,9 +173,12 @@ def run(self): 'data/analysis/*', 'data/*.gsd', 'data/windows/*', - 'data/*.itp', + 'data/*.itp', "data/gromacs/gromos54a7_edited.ff/*", 'data/*.coor', 'data/*.h5md', + 'data/*.in', + 'data/*.top', + 'data/*.sdf', ], }, install_requires=[