diff --git a/.appveyor.yml b/.appveyor.yml index d61f9c52050..8982aae61eb 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -10,7 +10,7 @@ cache: environment: global: CONDA_CHANNELS: conda-forge - CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0 + CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0 rdkit PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed DEBUG: "False" MINGW_64: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin diff --git a/.travis.yml b/.travis.yml index 8ec63ea5791..866a29fa2e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ env: - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0" + - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - PIP_DEPENDENCIES="duecredit parmed" diff --git a/package/CHANGELOG b/package/CHANGELOG index 4b67ccbc3cf..db3c967b1a8 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -??/??/?? richardjgowers, IAlibay, hmacdope, orbeckst +??/??/?? richardjgowers, IAlibay, hmacdope, orbeckst, cbouy * 2.0.0 @@ -23,6 +23,12 @@ Fixes * Testsuite does not any more matplotlib.use('agg') (#2191) Enhancements + * Added the RDKitParser which creates a `core.topology.Topology` object from + an `rdkit.Chem.rdchem.Mol` object, and the RDKitReader to read coordinates + from RDKit conformers (Issue #2468, PR #2707) + * Added the `Aromaticities` topology attribute, and the `aromatic` selection + token (Issue #2468, PR #2707) + * Added the `from_smiles` classmethod to the Universe (Issue #2468, PR #2707) * Added computation of Mean Squared Displacements (#2438, PR #2619) Changes diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py new file mode 100644 index 00000000000..3c64241d109 --- /dev/null +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -0,0 +1,104 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""RDKit molecule --- :mod:`MDAnalysis.coordinates.RDKit` +================================================================ + +Read coordinates data from an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitReader` +into a MDAnalysis Universe. Convert it back to a :class:`rdkit.Chem.rdchem.Mol` with +:class:`RDKitConverter`. + + +Example +------- + +>>> from rdkit import Chem +>>> import MDAnalysis as mda +>>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False) +>>> u = mda.Universe(mol) +>>> u + +>>> u.trajectory + + + +Classes +------- + +.. autoclass:: RDKitReader + :members: + +.. autoclass:: RDKitConverter + :members: + + +""" + +import warnings + +import numpy as np + +from . import memory + + +class RDKitReader(memory.MemoryReader): + """Coordinate reader for RDKit. + + .. versionadded:: 2.0.0 + """ + format = 'RDKIT' + + # Structure.coordinates always in Angstrom + units = {'time': None, 'length': 'Angstrom'} + + @staticmethod + def _format_hint(thing): + """Can this reader read *thing*?""" + try: + from rdkit import Chem + except ImportError: + # if we can't import rdkit, it's probably not rdkit + return False + else: + return isinstance(thing, Chem.Mol) + + def __init__(self, filename, **kwargs): + """Read coordinates from an RDKit molecule. + Each conformer in the original RDKit molecule will be read as a frame + in the resulting universe. + + Parameters + ---------- + + filename : rdkit.Chem.rdchem.Mol + RDKit molecule + """ + n_atoms = filename.GetNumAtoms() + coordinates = np.array([ + conf.GetPositions() for conf in filename.GetConformers()], + dtype=np.float32) + if coordinates.size == 0: + warnings.warn("No coordinates found in the RDKit molecule") + coordinates = np.empty((1,n_atoms,3), dtype=np.float32) + coordinates[:] = np.nan + super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) \ No newline at end of file diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 5bdcff1f4b7..ad3d5b53cb8 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -738,3 +738,4 @@ class can choose an appropriate reader automatically. from . import null from . import NAMDBIN from . import FHIAIMS +from . import RDKit diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 6784110d2db..ced2b91763a 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -568,6 +568,21 @@ class AltlocSelection(StringSelection): field = 'altLocs' +class AromaticSelection(Selection): + """Select aromatic atoms. + + Aromaticity is available in the `aromaticities` attribute and is made + available through RDKit""" + token = 'aromatic' + field = 'aromaticities' + + def __init__(self, parser, tokens): + pass + + def apply(self, group): + return group[group.aromaticities].unique + + class ResidSelection(Selection): """Select atoms based on numerical fields diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 5345fcfc831..f623b57d979 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -1672,6 +1672,18 @@ def _gen_initial_values(na, nr, ns): return np.zeros(na) +class Aromaticities(AtomAttr): + """Aromaticity (RDKit)""" + attrname = "aromaticities" + singular = "aromaticity" + per_object = "atom" + dtype = bool + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.zeros(na, dtype=bool) + + class ResidueAttr(TopologyAttr): attrname = 'residueattrs' singular = 'residueattr' diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 16d2e4c90db..b2562effb97 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1285,6 +1285,87 @@ def _fragdict(self): fragdict[a.ix] = fraginfo(i, f) return fragdict + + @classmethod + def from_smiles(cls, smiles, sanitize=True, addHs=True, + generate_coordinates=True, numConfs=1, + rdkit_kwargs={}, **kwargs): + """Create a Universe from a SMILES string with rdkit + + Parameters + ---------- + smiles : str + SMILES string + + sanitize : bool (optional, default True) + Toggle the sanitization of the molecule + + addHs : bool (optional, default True) + Add all necessary hydrogens to the molecule + + generate_coordinates : bool (optional, default True) + Generate 3D coordinates using RDKit's `AllChem.EmbedMultipleConfs` + function. Requires adding hydrogens with the `addHs` parameter + + numConfs : int (optional, default 1) + Number of frames to generate coordinates for. Ignored if + `generate_coordinates=False` + + rdkit_kwargs : dict (optional) + Other arguments passed to the RDKit `EmbedMultipleConfs` function + + kwargs : dict + Parameters passed on Universe creation + + Returns + ------- + :class:`~MDAnalysis.core.Universe` + + Examples + -------- + To create a Universe with 10 conformers of ethanol: + + >>> u = mda.Universe.from_smiles('CCO', numConfs=10) + >>> u + + >>> u.trajectory + + + To use a different conformer generation algorithm, like ETKDGv3: + + >>> u = mda.Universe.from_smiles('CCO', rdkit_kwargs=dict( + params=AllChem.ETKDGv3())) + >>> u.trajectory + + + + .. versionadded:: 2.0.0 + """ + try: + from rdkit import Chem + from rdkit.Chem import AllChem + except ImportError as e: + raise ImportError( + "Creating a Universe from a SMILES string requires RDKit but " + "it does not appear to be installed") from e + + mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) + if mol is None: + raise SyntaxError('Error while parsing SMILES {0}'.format(smiles)) + if addHs: + mol = Chem.AddHs(mol) + if generate_coordinates: + if not addHs: + raise ValueError("Generating coordinates requires adding " + "hydrogens with `addHs=True`") + + numConfs = rdkit_kwargs.pop("numConfs", numConfs) + if not (type(numConfs) is int and numConfs > 0): + raise SyntaxError("numConfs must be a non-zero positive " + "integer instead of {0}".format(numConfs)) + AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs) + + return cls(mol, **kwargs) # TODO: what is the point of this function??? diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py new file mode 100644 index 00000000000..e483ba667ea --- /dev/null +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -0,0 +1,285 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +RDKit topology parser +===================== + +Converts an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`. + + +See Also +-------- +:mod:`MDAnalysis.coordinates.RDKit` + + +Classes +------- + +.. autoclass:: RDKitParser + :members: + :inherited-members: + +""" + +import logging +import warnings +import numpy as np + +from .base import TopologyReaderBase, change_squash +from . import guessers +from ..core.topologyattrs import ( + Atomids, + Atomnames, + Atomtypes, + Elements, + Masses, + Charges, + Aromaticities, + Bonds, + Resids, + Resnums, + Resnames, + Segids, + AltLocs, + ChainIDs, + Occupancies, + Tempfactors, +) +from ..core.topology import Topology + +logger = logging.getLogger("MDAnalysis.topology.RDKitParser") + + +class RDKitParser(TopologyReaderBase): + """ + For RDKit structures + + Creates the following Attributes: + - Atomids + - Atomnames + - Aromaticities + - Elements + - Masses + - Bonds + - Resids + - Resnums + - Segids + + Guesses the following: + - Atomtypes + + Depending on RDKit's input, the following Attributes might be present: + - Charges + - Resnames + - AltLocs + - ChainIDs + - Occupancies + - Tempfactors + + + .. versionadded:: 2.0.0 + """ + format = 'RDKIT' + + @staticmethod + def _format_hint(thing): + """Can this Parser read object *thing*?""" + try: + from rdkit import Chem + except ImportError: # if no rdkit, probably not rdkit + return False + else: + return isinstance(thing, Chem.Mol) + + def parse(self, **kwargs): + """Parse RDKit into Topology + + Returns + ------- + MDAnalysis Topology object + """ + + mol = self.filename + + # Atoms + names = [] + resnums = [] + resnames = [] + elements = [] + masses = [] + charges = [] + aromatics = [] + ids = [] + atomtypes = [] + segids = [] + altlocs = [] + chainids = [] + occupancies = [] + tempfactors = [] + + # check if multiple charges present + atom = mol.GetAtomWithIdx(0) + if atom.HasProp('_GasteigerCharge') and ( + atom.HasProp('_TriposPartialCharge') + ): + warnings.warn( + 'Both _GasteigerCharge and _TriposPartialCharge properties ' + 'are present. Using Gasteiger charges by default.') + + for atom in mol.GetAtoms(): + ids.append(atom.GetIdx()) + elements.append(atom.GetSymbol()) + masses.append(atom.GetMass()) + aromatics.append(atom.GetIsAromatic()) + mi = atom.GetMonomerInfo() + if mi: # atom name and residue info are present + names.append(mi.GetName().strip()) + resnums.append(mi.GetResidueNumber()) + resnames.append(mi.GetResidueName()) + segids.append(mi.GetSegmentNumber()) + altlocs.append(mi.GetAltLoc().strip()) + chainids.append(mi.GetChainId().strip()) + occupancies.append(mi.GetOccupancy()) + tempfactors.append(mi.GetTempFactor()) + else: + # atom name (MOL2 only) + try: + names.append(atom.GetProp('_TriposAtomName')) + except KeyError: + pass + # atom type (MOL2 only) + try: + atomtypes.append(atom.GetProp('_TriposAtomType')) + except KeyError: + pass + # gasteiger charge (computed): + # if the user took the time to compute them, make it a priority + # over charges read from a MOL2 file + try: + charges.append(atom.GetDoubleProp('_GasteigerCharge')) + except KeyError: + # partial charge (MOL2 only) + try: + charges.append(atom.GetDoubleProp('_TriposPartialCharge')) + except KeyError: + pass + + + # make Topology attributes + attrs = [] + n_atoms = len(ids) + + # * Attributes always present * + + # Atom attributes + for vals, Attr, dtype in ( + (ids, Atomids, np.int32), + (elements, Elements, object), + (masses, Masses, np.float32), + (aromatics, Aromaticities, bool), + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + + # Bonds + bonds = [] + bond_types = [] + bond_orders = [] + for bond in mol.GetBonds(): + bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) + bond_orders.append(bond.GetBondTypeAsDouble()) + bond_types.append(str(bond.GetBondType())) + attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) + + # * Optional attributes * + + # Atom name + if names: + attrs.append(Atomnames(np.array(names, dtype=object))) + else: + for atom in mol.GetAtoms(): + name = "%s%d" % (atom.GetSymbol(), atom.GetIdx()) + names.append(name) + attrs.append(Atomnames(np.array(names, dtype=object))) + + # Atom type + if atomtypes: + attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + else: + atomtypes = guessers.guess_types(names) + attrs.append(Atomtypes(atomtypes, guessed=True)) + + # Partial charges + if charges: + attrs.append(Charges(np.array(charges, dtype=np.float32))) + else: + pass # no guesser yet + + # PDB only + for vals, Attr, dtype in ( + (altlocs, AltLocs, object), + (chainids, ChainIDs, object), + (occupancies, Occupancies, np.float32), + (tempfactors, Tempfactors, np.float32), + ): + if vals: + attrs.append(Attr(np.array(vals, dtype=dtype))) + + # Residue + if any(resnums) and not any(val is None for val in resnums): + resnums = np.array(resnums, dtype=np.int32) + resnames = np.array(resnames, dtype=object) + segids = np.array(segids, dtype=object) + residx, (resnums, resnames, segids) = change_squash( + (resnums, resnames, segids), + (resnums, resnames, segids)) + n_residues = len(resnums) + for vals, Attr, dtype in ( + (resnums, Resids, np.int32), + (resnums.copy(), Resnums, np.int32), + (resnames, Resnames, object) + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + else: + attrs.append(Resids(np.array([1]))) + attrs.append(Resnums(np.array([1]))) + residx = None + n_residues = 1 + + # Segment + if any(segids) and not any(val is None for val in segids): + segidx, (segids,) = change_squash((segids,), (segids,)) + n_segments = len(segids) + attrs.append(Segids(segids)) + else: + n_segments = 1 + attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + segidx = None + + # create topology + top = Topology(n_atoms, n_residues, n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx) + + return top \ No newline at end of file diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 415343d813d..8d7628693e9 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -306,7 +306,8 @@ __all__ = ['core', 'PSFParser', 'PDBParser', 'PQRParser', 'GROParser', 'CRDParser', 'TOPParser', 'PDBQTParser', 'TPRParser', 'LAMMPSParser', 'XYZParser', 'GMSParser', 'DLPolyParser', - 'HoomdXMLParser','GSDParser', 'ITPParser', 'ParmEdParser'] + 'HoomdXMLParser','GSDParser', 'ITPParser', 'ParmEdParser', + 'RDKitParser'] from . import core from . import PSFParser @@ -331,4 +332,5 @@ from . import MinimalParser from . import ITPParser from . import ParmEdParser +from . import RDKitParser from . import FHIAIMSParser diff --git a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst new file mode 100644 index 00000000000..9f6bbd1dac9 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.topology.RDKitParser \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index ed8caba8ce6..04ebf433e27 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -43,6 +43,7 @@ topology file format in the *topology_format* keyword argument to topology/PDBQTParser topology/PQRParser topology/PSFParser + topology/RDKitParser topology/TOPParser topology/TPRParser topology/TXYZParser diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py new file mode 100644 index 00000000000..7bdb845f95a --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -0,0 +1,78 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import warnings + +import pytest +import MDAnalysis as mda +import numpy as np +from numpy.testing import (assert_equal, + assert_almost_equal) + +from MDAnalysisTests.datafiles import mol2_molecule + +Chem = pytest.importorskip("rdkit.Chem") +AllChem = pytest.importorskip("rdkit.Chem.AllChem") + +def mol2_mol(): + return Chem.MolFromMol2File(mol2_molecule, removeHs=False) + +def smiles_mol(): + mol = Chem.MolFromSmiles("CCO") + mol = Chem.AddHs(mol) + cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) + return mol + +class TestRDKitReader(object): + @pytest.mark.parametrize("rdmol, n_frames", [ + (mol2_mol(), 1), + (smiles_mol(), 3), + ]) + def test_coordinates(self, rdmol, n_frames): + universe = mda.Universe(rdmol) + assert universe.trajectory.n_frames == n_frames + expected = np.array([ + conf.GetPositions() for conf in rdmol.GetConformers()], + dtype=np.float32) + assert_equal(expected, universe.trajectory.coordinate_array) + + def test_no_coordinates(self): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + u = mda.Universe.from_smiles("CCO", generate_coordinates=False) + # Verify the warning + assert len(w) == 1 + assert "No coordinates found" in str( + w[-1].message) + expected = np.empty((1,u.atoms.n_atoms,3), dtype=np.float32) + expected[:] = np.nan + assert_equal(u.trajectory.coordinate_array, expected) + + def test_compare_mol2reader(self): + universe = mda.Universe(mol2_mol()) + mol2 = mda.Universe(mol2_molecule) + assert universe.trajectory.n_frames == mol2.trajectory.n_frames + assert_equal(universe.trajectory.ts.positions, + mol2.trajectory.ts.positions) + diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 883cf83ab48..b06ea476efb 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -516,6 +516,28 @@ def test_molnum(self, universe, selection_string, reference): assert_equal(sel.ids, np.array(reference, dtype=np.int32)) +class TestSelectionRDKit(object): + def setup_class(self): + pytest.importorskip("rdkit.Chem") + + @pytest.fixture + def u(self): + smi = "Cc1cNcc1" + u = MDAnalysis.Universe.from_smiles(smi, addHs=False, + generate_coordinates=False) + return u + + @pytest.mark.parametrize("sel_str, n_atoms", [ + ("aromatic", 5), + ("not aromatic", 1), + ("type N and aromatic", 1), + ("type C and aromatic", 4), + ]) + def test_selection(self, u, sel_str, n_atoms): + sel = u.select_atoms(sel_str) + assert sel.n_atoms == n_atoms + + class TestSelectionsNucleicAcids(object): @pytest.fixture(scope='class') def universe(self): diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 407fbbab924..630adfa7136 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -214,6 +214,81 @@ def test_universe_topology_class_with_coords(self): assert u2._topology is u._topology +class TestUniverseFromSmiles(object): + def setup_class(self): + pytest.importorskip("rdkit.Chem") + + def test_default(self): + smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + u = mda.Universe.from_smiles(smi, format='RDKIT') + assert u.atoms.n_atoms == 24 + assert len(u.bonds.indices) == 25 + + def test_from_bad_smiles(self): + with pytest.raises(SyntaxError) as e: + u = mda.Universe.from_smiles("J", format='RDKIT') + assert "Error while parsing SMILES" in str(e.value) + + def test_no_Hs(self): + smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + u = mda.Universe.from_smiles(smi, addHs=False, + generate_coordinates=False, format='RDKIT') + assert u.atoms.n_atoms == 14 + assert len(u.bonds.indices) == 15 + + def test_gencoords_without_Hs_error(self): + with pytest.raises(ValueError) as e: + u = mda.Universe.from_smiles("CCO", addHs=False, + generate_coordinates=True, format='RDKIT') + assert "requires adding hydrogens" in str (e.value) + + def test_generate_coordinates_numConfs(self): + with pytest.raises(SyntaxError) as e: + u = mda.Universe.from_smiles("CCO", numConfs=0, format='RDKIT') + assert "non-zero positive integer" in str (e.value) + with pytest.raises(SyntaxError) as e: + u = mda.Universe.from_smiles("CCO", numConfs=2.1, format='RDKIT') + assert "non-zero positive integer" in str (e.value) + + def test_rdkit_kwargs(self): + # test for bad kwarg: + # Unfortunately, exceptions from Boost cannot be passed to python, + # we cannot `from Boost.Python import ArgumentError` and use it with + # pytest.raises(ArgumentError), so "this is the way" + try: + u = mda.Universe.from_smiles("CCO", rdkit_kwargs=dict(abc=42)) + except Exception as e: + assert "did not match C++ signature" in str(e) + else: + raise AssertionError("RDKit should have raised an ArgumentError " + "from Boost") + # good kwarg + u1 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=42)) + u2 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=51)) + with pytest.raises(AssertionError) as e: + assert_equal(u1.trajectory.coordinate_array, + u2.trajectory.coordinate_array) + assert "Mismatched elements: 15 / 15 (100%)" in str(e.value) + + + def test_coordinates(self): + u = mda.Universe.from_smiles("C", numConfs=2, + rdkit_kwargs=dict(randomSeed=42)) + assert u.trajectory.n_frames == 2 + expected = np.array([ + [[-0.02209686, 0.00321505, 0.01651974], + [-0.6690088 , 0.8893599 , -0.1009085 ], + [-0.37778795, -0.8577519 , -0.58829606], + [ 0.09642092, -0.3151253 , 1.0637809 ], + [ 0.97247267, 0.28030226, -0.3910961 ]], + [[-0.0077073 , 0.00435363, 0.01834692], + [-0.61228824, -0.83705765, -0.38619974], + [-0.41925883, 0.9689095 , -0.3415968 ], + [ 0.03148226, -0.03256683, 1.1267245 ], + [ 1.0077721 , -0.10363862, -0.41727486]]], dtype=np.float32) + assert_almost_equal(u.trajectory.coordinate_array, expected) + + class TestUniverse(object): # older tests, still useful def test_load_bad_topology(self): diff --git a/testsuite/MDAnalysisTests/data/molecule.sdf b/testsuite/MDAnalysisTests/data/molecule.sdf new file mode 100644 index 00000000000..f8741e6cd8d --- /dev/null +++ b/testsuite/MDAnalysisTests/data/molecule.sdf @@ -0,0 +1,111 @@ + + Mrv1906 06042017153D 65.42000 + + 49 49 0 0 1 0 999 V2000 + 4.1617 -6.6571 -4.2692 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.3732 -6.3648 -2.9716 C 0 0 2 0 0 0 0 0 0 0 0 0 + 2.2915 -5.2684 -3.1684 C 0 0 2 0 0 0 0 0 0 0 0 0 + 1.6144 -4.8365 -1.8398 C 0 0 2 0 0 0 0 0 0 0 0 0 + 0.5462 -3.7266 -2.0410 C 0 0 2 0 0 0 0 0 0 0 0 0 + -0.1543 -3.3758 -0.7697 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0713 -4.1813 -0.4645 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1202 -2.2528 0.1222 N 0 3 2 0 0 0 0 0 0 0 0 0 + 0.3380 -2.7978 1.5409 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.3703 -3.8157 1.7531 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2854 -4.7720 2.6926 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3168 -5.7663 2.8920 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0847 -1.2926 0.1515 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.3210 -0.4395 -1.0626 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1119 0.9626 -1.0056 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3022 1.7732 -2.1391 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7066 1.1958 -3.3531 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9382 -0.1889 -3.4295 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7519 -0.9955 -2.2911 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3387 -1.9286 0.4298 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3554 -1.4508 -0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4847 -6.9830 -5.0617 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6907 -5.7600 -4.6002 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8962 -7.4454 -4.0907 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0831 -6.0473 -2.2028 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8964 -7.2888 -2.6319 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5293 -5.6461 -3.8560 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7551 -4.3889 -3.6217 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3872 -4.4651 -1.1634 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1479 -5.7116 -1.3765 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1979 -4.0762 -2.7637 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0149 -2.8541 -2.4933 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5939 -1.9790 2.2218 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6069 -3.2018 1.9191 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1878 -3.7852 1.1953 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4812 -4.8129 3.2750 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2887 -5.2853 3.0242 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3566 -6.4315 2.0274 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1040 -6.3627 3.7820 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9513 -0.6146 1.0035 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8146 1.4079 -0.1351 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1445 2.7824 -2.0793 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8357 1.7843 -4.1783 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2428 -0.6076 -4.3112 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9401 -1.9949 -2.3692 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2461 -2.3218 1.3214 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5469 -0.6522 0.4639 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2588 -0.9729 -1.2334 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2520 -2.0681 -0.3060 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 1 0 0 0 0 + 3 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 5 6 1 0 0 0 0 + 6 7 2 0 0 0 0 + 8 6 1 0 0 0 0 + 8 9 1 0 0 0 0 + 9 10 1 0 0 0 0 + 10 11 2 0 0 0 0 + 11 12 1 0 0 0 0 + 8 13 1 0 0 0 0 + 13 14 1 0 0 0 0 + 14 15 2 0 0 0 0 + 15 16 1 0 0 0 0 + 16 17 2 0 0 0 0 + 17 18 1 0 0 0 0 + 18 19 2 0 0 0 0 + 14 19 1 0 0 0 0 + 13 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 1 22 1 0 0 0 0 + 1 23 1 0 0 0 0 + 1 24 1 0 0 0 0 + 2 25 1 0 0 0 0 + 2 26 1 0 0 0 0 + 3 27 1 0 0 0 0 + 3 28 1 0 0 0 0 + 4 29 1 0 0 0 0 + 4 30 1 0 0 0 0 + 5 31 1 0 0 0 0 + 5 32 1 0 0 0 0 + 9 33 1 0 0 0 0 + 9 34 1 0 0 0 0 + 10 35 1 0 0 0 0 + 11 36 1 0 0 0 0 + 12 37 1 0 0 0 0 + 12 38 1 0 0 0 0 + 12 39 1 0 0 0 0 + 13 40 1 0 0 0 0 + 15 41 1 0 0 0 0 + 16 42 1 0 0 0 0 + 17 43 1 0 0 0 0 + 18 44 1 0 0 0 0 + 19 45 1 0 0 0 0 + 20 46 1 0 0 0 0 + 21 47 1 0 0 0 0 + 21 48 1 0 0 0 0 + 21 49 1 0 0 0 0 +M CHG 1 8 1 +M END +> +N‐[(2E)‐but‐2‐en‐1‐yl]‐N‐[(R)‐hydroxy(phenyl)methyl]‐N‐methylhexanamidium + +> +0.31 + +$$$$ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index cc12454aa35..6c275939a16 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -190,6 +190,7 @@ "PDB_CHECK_RIGHTHAND_PA", # for testing right handedness of principal_axes "MMTF_NOCRYST", # File with meaningless CRYST1 record (Issue #2679, PR #2685) "FHIAIMS", # to test FHIAIMS coordinate files + "SDF_molecule" # MDL SDFile for rdkit ] from pkg_resources import resource_filename @@ -520,5 +521,7 @@ NAMDBIN = resource_filename(__name__, 'data/adk_open.coor') +SDF_molecule = resource_filename(__name__, 'data/molecule.sdf') + # This should be the last line: clean up namespace del resource_filename diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py new file mode 100644 index 00000000000..1a4218e55e2 --- /dev/null +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -0,0 +1,189 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +import warnings +import pytest +import numpy as np +from numpy.testing import assert_equal + +import MDAnalysis as mda +from MDAnalysisTests.topology.base import ParserBase +from MDAnalysisTests.datafiles import mol2_molecule, PDB_helix, SDF_molecule + +Chem = pytest.importorskip('rdkit.Chem') +AllChem = pytest.importorskip('rdkit.Chem.AllChem') + +class RDKitParserBase(ParserBase): + parser = mda.topology.RDKitParser.RDKitParser + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', + 'resids', 'resnums', + 'segids', + 'bonds', + ] + + expected_n_atoms = 0 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 0 + + def test_creates_universe(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert isinstance(u, mda.Universe) + + def test_bonds_total_counts(self, top): + assert len(top.bonds.values) == self.expected_n_bonds + + +class TestRDKitParserMOL2(RDKitParserBase): + ref_filename = mol2_molecule + + expected_attrs = RDKitParserBase.expected_attrs + ['charges'] + + expected_n_atoms = 49 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 51 + + @pytest.fixture + def filename(self): + return Chem.MolFromMol2File(self.ref_filename, removeHs=False) + + def _create_mol_gasteiger_charges(self): + mol = Chem.MolFromMol2File(self.ref_filename, removeHs=False) + AllChem.ComputeGasteigerCharges(mol) + return mol + + def _remove_tripos_charges(self, mol): + for atom in mol.GetAtoms(): + atom.ClearProp("_TriposPartialCharge") + + @pytest.fixture + def top_gas_tripos(self): + mol = self._create_mol_gasteiger_charges() + return self.parser(mol).parse() + + @pytest.fixture + def filename_gasteiger(self): + mol = self._create_mol_gasteiger_charges() + self._remove_tripos_charges(mol) + return mol + + @pytest.fixture + def top_gasteiger(self): + mol = self._create_mol_gasteiger_charges() + self._remove_tripos_charges(mol) + return self.parser(mol).parse() + + + def test_bond_orders(self, top, filename): + expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] + assert top.bonds.order == expected + + def test_multiple_charge_priority(self, + top_gas_tripos, filename_gasteiger): + expected = np.array([ + a.GetDoubleProp('_GasteigerCharge') for a in + filename_gasteiger.GetAtoms()], dtype=np.float32) + assert_equal(expected, top_gas_tripos.charges.values) + + def test_multiple_charge_props_warning(self): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + mol = self._create_mol_gasteiger_charges() + # Trigger a warning. + top = self.parser(mol).parse() + # Verify the warning + assert len(w) == 1 + assert "_GasteigerCharge and _TriposPartialCharge" in str( + w[-1].message) + + def test_gasteiger_charges(self, top_gasteiger, filename_gasteiger): + expected = np.array([ + a.GetDoubleProp('_GasteigerCharge') for a in + filename_gasteiger.GetAtoms()], dtype=np.float32) + assert_equal(expected, top_gasteiger.charges.values) + + def test_tripos_charges(self, top, filename): + expected = np.array([ + a.GetDoubleProp('_TriposPartialCharge') for a in filename.GetAtoms() + ], dtype=np.float32) + assert_equal(expected, top.charges.values) + + def test_aromaticity(self, top, filename): + expected = np.array([ + atom.GetIsAromatic() for atom in filename.GetAtoms()]) + assert_equal(expected, top.aromaticities.values) + + +class TestRDKitParserPDB(RDKitParserBase): + ref_filename = PDB_helix + + expected_attrs = RDKitParserBase.expected_attrs + ['resnames', 'altLocs', + 'chainIDs', 'occupancies', 'tempfactors'] + guessed_attrs = ['types'] + + expected_n_atoms = 137 + expected_n_residues = 13 + expected_n_segments = 1 + expected_n_bonds = 137 + + @pytest.fixture + def filename(self): + return Chem.MolFromPDBFile(self.ref_filename, removeHs=False) + + +class TestRDKitParserSMILES(RDKitParserBase): + ref_filename = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + + guessed_attrs = ['types'] + + expected_n_atoms = 24 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 25 + + @pytest.fixture + def filename(self): + mol = Chem.MolFromSmiles(self.ref_filename) + mol = Chem.AddHs(mol) + return mol + + +class TestRDKitParserSDF(RDKitParserBase): + ref_filename = SDF_molecule + + guessed_attrs = ['types'] + + expected_n_atoms = 49 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 49 + + @pytest.fixture + def filename(self): + return Chem.SDMolSupplier(SDF_molecule, removeHs=False)[0] + + def test_bond_orders(self, top, filename): + expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] + assert top.bonds.order == expected \ No newline at end of file