From ba1f3e44982aec88373389120845f3fe5825740d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 1 Jun 2020 19:54:56 +0200 Subject: [PATCH 01/39] barebones topology from RDKitParser --- package/MDAnalysis/topology/RDKitParser.py | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 package/MDAnalysis/topology/RDKitParser.py diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py new file mode 100644 index 00000000000..0b57159e375 --- /dev/null +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -0,0 +1,154 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +RDKit topology parser +===================== + +Converts a `RDKit `_ +:class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`. + + +Example +------- +TODO + +Classes +------- + +.. autoclass:: RDKitParser + :members: + :inherited-members: + +""" +from __future__ import absolute_import, division +from six.moves import range +from six import raise_from + +import logging +import numpy as np + +from .base import TopologyReaderBase, squash_by +from ..core.topologyattrs import ( + Atomids, + Atomnames, + Elements, + Masses, + Bonds, + Resids, + Resnums, + Resnames, + Segids, +) +from ..core.topology import Topology + +logger = logging.getLogger("MDAnalysis.topology.RDKitParser") + + +class RDKitParser(TopologyReaderBase): + """ + For RDKit structures + """ + format = 'RDKIT' + + @staticmethod + def _format_hint(thing): + """Can this Parser read object *thing*? + + .. versionadded:: 1.0.0 + """ + try: + from rdkit import Chem + except ImportError: # if no rdkit, probably not rdkit + return False + else: + return isinstance(thing, Chem.Mol) + + def parse(self, **kwargs): + """Parse RDKit into Topology + + Returns + ------- + MDAnalysis *Topology* object + """ + + mol = self.filename + + # Atoms + names = [] + residue_numbers = [] + residue_names = [] + elements = [] + masses = [] + ids = [] + for atom in mol.GetAtoms(): + ids.append(atom.GetIdx()) + elements.append(atom.GetSymbol()) + masses.append(atom.GetMass()) + mi = atom.GetMonomerInfo() + if mi: # atom name and residue info are present + names.append(mi.GetName().strip()) + residue_numbers.append(mi.GetResidueNumber()) + residue_names.append(mi.GetResidueName()) + else: + pass + + # make Topology attributes + residx, residue_numbers, (residue_names,) = squash_by( + residue_numbers, np.array(residue_names)) + attrs = [] + n_atoms = len(names) + n_residues = len(residue_numbers) + + for vals, Attr, dtype in ( + (ids, Atomids, np.int32), + (elements, Elements, object), + (names, Atomnames, object), + (masses, Masses, np.float32), + (residue_numbers, Resids, np.int32), + (residue_numbers.copy(), Resnums, np.int32), + (residue_names, Resnames, object) + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + + # Bonds + bonds = [] + bond_types = [] + bond_orders = [] + for bond in mol.GetBonds(): + bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) + bond_orders.append(bond.GetBondTypeAsDouble()) + bond_types.append(str(bond.GetBondType())) + + attrs.append(Bonds(bonds, types=bond_types, guessed=False, + order=bond_orders)) + + # Others + #TODO + attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + + top = Topology(n_atoms, n_residues, 1, + attrs=attrs, + atom_resindex=residx) + + return top \ No newline at end of file From 7e172fcc97ac98eab7f93951ca01ff2598db0c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 2 Jun 2020 20:11:14 +0200 Subject: [PATCH 02/39] test for PDB and MOL2 + fix atomtypes and residues --- package/MDAnalysis/topology/RDKitParser.py | 45 +++++++--- package/MDAnalysis/topology/__init__.py | 4 +- .../MDAnalysisTests/topology/test_rdkit.py | 90 +++++++++++++++++++ 3 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 testsuite/MDAnalysisTests/topology/test_rdkit.py diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 0b57159e375..d4664c29524 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -42,16 +42,16 @@ """ from __future__ import absolute_import, division -from six.moves import range -from six import raise_from import logging import numpy as np from .base import TopologyReaderBase, squash_by +from . import guessers from ..core.topologyattrs import ( Atomids, Atomnames, + Atomtypes, Elements, Masses, Bonds, @@ -101,6 +101,7 @@ def parse(self, **kwargs): elements = [] masses = [] ids = [] + atomtypes = [] for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) @@ -111,26 +112,31 @@ def parse(self, **kwargs): residue_numbers.append(mi.GetResidueNumber()) residue_names.append(mi.GetResidueName()) else: - pass + for prop, value in atom.GetPropsAsDict(True).items(): + if 'atomname' in prop.lower(): # usually _TriposAtomName + names.append(value) + elif 'atomtype' in prop.lower(): # usually _TriposAtomType + atomtypes.append(value) # make Topology attributes - residx, residue_numbers, (residue_names,) = squash_by( - residue_numbers, np.array(residue_names)) attrs = [] - n_atoms = len(names) - n_residues = len(residue_numbers) + n_atoms = len(ids) for vals, Attr, dtype in ( (ids, Atomids, np.int32), (elements, Elements, object), (names, Atomnames, object), (masses, Masses, np.float32), - (residue_numbers, Resids, np.int32), - (residue_numbers.copy(), Resnums, np.int32), - (residue_names, Resnames, object) ): attrs.append(Attr(np.array(vals, dtype=dtype))) + # Optional + if atomtypes: + attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + else: + atomtypes = guessers.guess_types(names) + attrs.append(Atomtypes(atomtypes, guessed=True)) + # Bonds bonds = [] bond_types = [] @@ -140,11 +146,24 @@ def parse(self, **kwargs): bond_orders.append(bond.GetBondTypeAsDouble()) bond_types.append(str(bond.GetBondType())) - attrs.append(Bonds(bonds, types=bond_types, guessed=False, - order=bond_orders)) + attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) # Others - #TODO + residx, residue_numbers, (residue_names,) = squash_by( + residue_numbers, np.array(residue_names)) + n_residues = len(residue_numbers) + if n_residues > 0: + for vals, Attr, dtype in ( + (residue_numbers, Resids, np.int32), + (residue_numbers.copy(), Resnums, np.int32), + (residue_names, Resnames, object) + ): + attrs.append(Attr(np.array(vals, dtype=dtype))) + else: + attrs.append(Resids(np.array([1]))) + attrs.append(Resnums(np.array([1]))) + residx = None + n_residues = 1 attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) top = Topology(n_atoms, n_residues, 1, diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index a5a21069306..d3d912090e3 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -288,7 +288,8 @@ __all__ = ['core', 'PSFParser', 'PDBParser', 'PQRParser', 'GROParser', 'CRDParser', 'TOPParser', 'PDBQTParser', 'TPRParser', 'LAMMPSParser', 'XYZParser', 'GMSParser', 'DLPolyParser', - 'HoomdXMLParser','GSDParser', 'ITPParser', 'ParmEdParser'] + 'HoomdXMLParser','GSDParser', 'ITPParser', 'ParmEdParser', + 'RDKitParser'] from . import core from . import PSFParser @@ -313,3 +314,4 @@ from . import MinimalParser from . import ITPParser from . import ParmEdParser +from . import RDKitParser diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py new file mode 100644 index 00000000000..d0552ceac89 --- /dev/null +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -0,0 +1,90 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import absolute_import + +import pytest + +import MDAnalysis as mda +from MDAnalysisTests.topology.base import ParserBase +from MDAnalysisTests.datafiles import mol2_molecule, PDB_helix + +Chem = pytest.importorskip('rdkit.Chem') + +class RDKitParserBase(ParserBase): + parser = mda.topology.RDKitParser.RDKitParser + expected_attrs = ['ids', 'names', 'elements', 'masses', + 'resids', 'resnums', + 'segids', + 'bonds', + ] + + expected_n_atoms = 0 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 0 + + def test_creates_universe(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert isinstance(u, mda.Universe) + + def test_bonds_total_counts(self, top): + assert len(top.bonds.values) == self.expected_n_bonds + + +class TestRDKitParserMOL2(RDKitParserBase): + ref_filename = mol2_molecule + + expected_n_atoms = 49 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 51 + + @pytest.fixture + def filename(self): + return Chem.MolFromMol2File(self.ref_filename, removeHs=False) + + def test_bond_orders(self, top, filename): + # not exactly what's written in the mol2 file but the 2 double bonds in + # the thiophene moiety should be written as aromatic, and they should + # be perceived as such by RDKit + expected = [1.5, 1, 1, 2, 1, 2, 1, 1, 1.5, 1, 1, 1.5, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1, 1.5, 1, 1.5, 1, 1.5, 1, + 1, 1, 1.5, 1, 1, 1, 1, 1.5, 1, 1, 2, 1, 1] + expected = [float(i) for i in expected] + assert sorted(top.bonds.order) == sorted(expected) + + +class TestRDKitParserPDB(RDKitParserBase): + ref_filename = PDB_helix + + expected_attrs = RDKitParserBase.expected_attrs + ['resnames'] + guessed_attrs = ['types'] + + expected_n_atoms = 137 + expected_n_residues = 13 + expected_n_segments = 1 + expected_n_bonds = 137 + + @pytest.fixture + def filename(self): + return Chem.MolFromPDBFile(self.ref_filename, removeHs=False) \ No newline at end of file From c4365175d79cb0effbaac11d138abd5b6cd79ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 3 Jun 2020 18:33:04 +0200 Subject: [PATCH 03/39] fix test on bond orders --- .../MDAnalysisTests/topology/test_rdkit.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index d0552ceac89..e2ed4b17ffb 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -61,17 +61,18 @@ class TestRDKitParserMOL2(RDKitParserBase): @pytest.fixture def filename(self): - return Chem.MolFromMol2File(self.ref_filename, removeHs=False) + return Chem.MolFromMol2File(self.ref_filename, removeHs=False, + sanitize=False) def test_bond_orders(self, top, filename): - # not exactly what's written in the mol2 file but the 2 double bonds in - # the thiophene moiety should be written as aromatic, and they should - # be perceived as such by RDKit - expected = [1.5, 1, 1, 2, 1, 2, 1, 1, 1.5, 1, 1, 1.5, 2, 2, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1, 1.5, 1, 1.5, 1, 1.5, 1, - 1, 1, 1.5, 1, 1, 1, 1, 1.5, 1, 1, 2, 1, 1] + # The 3 first aromatic bonds in the mol2 file are not actually + # aromatic but just part of a conjugated system. RDKit doesn't follow + # the mol2 file and marks them as single, even with `sanitize=False`. + expected = [1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1, 1.5, 1, 1.5, 1, 1.5, 1, 1, 1, + 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1] expected = [float(i) for i in expected] - assert sorted(top.bonds.order) == sorted(expected) + assert top.bonds.order == expected class TestRDKitParserPDB(RDKitParserBase): From b25898f9a4096fd950007513184fc757dc261e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 3 Jun 2020 18:41:36 +0200 Subject: [PATCH 04/39] add SMILES parser test --- .../MDAnalysisTests/topology/test_rdkit.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index e2ed4b17ffb..13bbf1680a0 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -88,4 +88,21 @@ class TestRDKitParserPDB(RDKitParserBase): @pytest.fixture def filename(self): - return Chem.MolFromPDBFile(self.ref_filename, removeHs=False) \ No newline at end of file + return Chem.MolFromPDBFile(self.ref_filename, removeHs=False) + + +class TestRDKitParserSMILES(RDKitParserBase): + ref_filename = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + + guessed_attrs = ['types'] + + expected_n_atoms = 24 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 25 + + @pytest.fixture + def filename(self): + mol = Chem.MolFromSmiles(self.ref_filename) + mol = Chem.AddHs(mol) + return mol \ No newline at end of file From 60af718a9628f870991d99f8fd9f1b610e9ad583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 3 Jun 2020 18:55:25 +0200 Subject: [PATCH 05/39] reorganize code + create name if not present --- package/MDAnalysis/topology/RDKitParser.py | 39 +++++++++++++++------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index d4664c29524..871d120433a 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -116,27 +116,22 @@ def parse(self, **kwargs): if 'atomname' in prop.lower(): # usually _TriposAtomName names.append(value) elif 'atomtype' in prop.lower(): # usually _TriposAtomType - atomtypes.append(value) - + atomtypes.append(value) + # make Topology attributes attrs = [] n_atoms = len(ids) + # * Attributes always present * + + # Atom index, symbol and mass for vals, Attr, dtype in ( (ids, Atomids, np.int32), (elements, Elements, object), - (names, Atomnames, object), (masses, Masses, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) - # Optional - if atomtypes: - attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) - else: - atomtypes = guessers.guess_types(names) - attrs.append(Atomtypes(atomtypes, guessed=True)) - # Bonds bonds = [] bond_types = [] @@ -145,10 +140,27 @@ def parse(self, **kwargs): bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) bond_orders.append(bond.GetBondTypeAsDouble()) bond_types.append(str(bond.GetBondType())) - attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) - # Others + # * Optional attributes * + + # Atom name + if names: + attrs.append(Atomnames(np.array(names, dtype=object))) + else: + for atom in mol.GetAtoms(): + name = "%s%d" % (atom.GetSymbol(), atom.GetIdx()) + names.append(name) + attrs.append(Atomnames(np.array(names, dtype=object))) + + # Atom type + if atomtypes: + attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + else: + atomtypes = guessers.guess_types(names) + attrs.append(Atomtypes(atomtypes, guessed=True)) + + # Residue residx, residue_numbers, (residue_names,) = squash_by( residue_numbers, np.array(residue_names)) n_residues = len(residue_numbers) @@ -164,8 +176,11 @@ def parse(self, **kwargs): attrs.append(Resnums(np.array([1]))) residx = None n_residues = 1 + + # Segment attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + # create topology top = Topology(n_atoms, n_residues, 1, attrs=attrs, atom_resindex=residx) From 94f18b5be57006e4b844f06e76849a8837d22a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 3 Jun 2020 19:29:07 +0200 Subject: [PATCH 06/39] added Segids attribute --- package/MDAnalysis/topology/RDKitParser.py | 44 ++++++++++++++-------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 871d120433a..34b744d8758 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -46,7 +46,7 @@ import logging import numpy as np -from .base import TopologyReaderBase, squash_by +from .base import TopologyReaderBase, change_squash from . import guessers from ..core.topologyattrs import ( Atomids, @@ -96,12 +96,13 @@ def parse(self, **kwargs): # Atoms names = [] - residue_numbers = [] - residue_names = [] + resnums = [] + resnames = [] elements = [] masses = [] ids = [] atomtypes = [] + segids = [] for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) @@ -109,8 +110,9 @@ def parse(self, **kwargs): mi = atom.GetMonomerInfo() if mi: # atom name and residue info are present names.append(mi.GetName().strip()) - residue_numbers.append(mi.GetResidueNumber()) - residue_names.append(mi.GetResidueName()) + resnums.append(mi.GetResidueNumber()) + resnames.append(mi.GetResidueName()) + segids.append(mi.GetSegmentNumber()) else: for prop, value in atom.GetPropsAsDict(True).items(): if 'atomname' in prop.lower(): # usually _TriposAtomName @@ -161,14 +163,18 @@ def parse(self, **kwargs): attrs.append(Atomtypes(atomtypes, guessed=True)) # Residue - residx, residue_numbers, (residue_names,) = squash_by( - residue_numbers, np.array(residue_names)) - n_residues = len(residue_numbers) - if n_residues > 0: + if any(resnums) and not any(val is None for val in resnums): + resnums = np.array(resnums, dtype=np.int32) + resnames = np.array(resnames, dtype=object) + segids = np.array(segids, dtype=object) + residx, (resnums, resnames, segids) = change_squash( + (resnums, resnames, segids), + (resnums, resnames, segids)) + n_residues = len(resnums) for vals, Attr, dtype in ( - (residue_numbers, Resids, np.int32), - (residue_numbers.copy(), Resnums, np.int32), - (residue_names, Resnames, object) + (resnums, Resids, np.int32), + (resnums.copy(), Resnums, np.int32), + (resnames, Resnames, object) ): attrs.append(Attr(np.array(vals, dtype=dtype))) else: @@ -178,11 +184,19 @@ def parse(self, **kwargs): n_residues = 1 # Segment - attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + if any(segids) and not any(val is None for val in segids): + segidx, (segids,) = change_squash((segids,), (segids,)) + n_segments = len(segids) + attrs.append(Segids(segids)) + else: + n_segments = 1 + attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + segidx = None # create topology - top = Topology(n_atoms, n_residues, 1, + top = Topology(n_atoms, n_residues, n_segments, attrs=attrs, - atom_resindex=residx) + atom_resindex=residx, + residue_segindex=segidx) return top \ No newline at end of file From fdb6513c2a5ec23a8629f0bb37e65ed6faca9e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 3 Jun 2020 20:01:27 +0200 Subject: [PATCH 07/39] added rdkit to appveyor and travis --- .appveyor.yml | 4 ++-- .travis.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index b3a24c9b0f7..c9cd30207a1 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -9,8 +9,8 @@ cache: environment: global: - CONDA_CHANNELS: conda-forge - CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm + CONDA_CHANNELS: conda-forge rdkit + CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm rdkit PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed DEBUG: "False" MINGW_64: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin diff --git a/.travis.yml b/.travis.yml index d526eb12d93..37a37db6c03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,8 +28,8 @@ env: - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0" - - CONDA_CHANNELS='biobuilds conda-forge' + - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 rdkit" + - CONDA_CHANNELS='biobuilds conda-forge rdkit' - CONDA_CHANNEL_PRIORITY=True - PIP_DEPENDENCIES="duecredit parmed" - NUMPY_VERSION=stable From 9caaa5c1d0ee85a128a2c98b32a56db45c8a9250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 4 Jun 2020 17:00:04 +0200 Subject: [PATCH 08/39] added SDF file for rdkit tests --- testsuite/MDAnalysisTests/data/molecule.sdf | 55 +++++++++++++++++++++ testsuite/MDAnalysisTests/datafiles.py | 5 +- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 testsuite/MDAnalysisTests/data/molecule.sdf diff --git a/testsuite/MDAnalysisTests/data/molecule.sdf b/testsuite/MDAnalysisTests/data/molecule.sdf new file mode 100644 index 00000000000..bea24264a1b --- /dev/null +++ b/testsuite/MDAnalysisTests/data/molecule.sdf @@ -0,0 +1,55 @@ + + Mrv1906 06042016552D 65.42000 + + 21 21 0 0 1 0 999 V2000 + -1.1690 1.9925 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1690 1.1675 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4545 0.7550 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4545 -0.0700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2599 -0.4825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2599 -1.3075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4545 -1.7200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9744 -1.7200 0.0000 N 0 3 2 0 0 0 0 0 0 0 0 0 + 0.5619 -2.4345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2631 -2.4345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6756 -3.1490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5006 -3.1490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6889 -2.1325 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.6889 -2.9575 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4033 -3.3700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4033 -4.1950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6889 -4.6075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9744 -4.1950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9744 -3.3700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4033 -1.7200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3869 -1.0056 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 1 0 0 0 0 + 3 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 5 6 1 0 0 0 0 + 6 7 2 0 0 0 0 + 8 6 1 1 0 0 0 + 8 9 1 0 0 0 0 + 9 10 1 0 0 0 0 + 10 11 2 0 0 0 0 + 11 12 1 0 0 0 0 + 8 13 1 0 0 0 0 + 13 14 1 0 0 0 0 + 14 15 2 0 0 0 0 + 15 16 1 0 0 0 0 + 16 17 2 0 0 0 0 + 17 18 1 0 0 0 0 + 18 19 2 0 0 0 0 + 14 19 1 0 0 0 0 + 13 20 1 6 0 0 0 + 8 21 1 6 0 0 0 +M CHG 1 8 1 +M END +> +N‐[(2E)‐but‐2‐en‐1‐yl]‐N‐[(R)‐hydroxy(phenyl)methyl]‐N‐methylhexanamidium + +> +0.31 + +$$$$ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 6611b566322..489a53346f7 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -187,7 +187,8 @@ "GMX_TOP_BAD", # file with an #include that doesn't exist "ITP_no_endif", # file missing an #endif "PDB_CRYOEM_BOX", #Issue 2599 - "PDB_CHECK_RIGHTHAND_PA" # for testing right handedness of principal_axes + "PDB_CHECK_RIGHTHAND_PA", # for testing right handedness of principal_axes + "SDF_molecule" # MDL SDFile ] from pkg_resources import resource_filename @@ -514,5 +515,7 @@ NAMDBIN = resource_filename(__name__, 'data/adk_open.coor') +SDF_molecule = resource_filename(__name__, 'data/molecule.sdf') + # This should be the last line: clean up namespace del resource_filename From 88b21307f1ac33af89209d065ba4d2fd5a743c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 4 Jun 2020 17:16:31 +0200 Subject: [PATCH 09/39] forgot hydrogens and 3D on SDFile --- testsuite/MDAnalysisTests/data/molecule.sdf | 108 +++++++++++++++----- 1 file changed, 82 insertions(+), 26 deletions(-) diff --git a/testsuite/MDAnalysisTests/data/molecule.sdf b/testsuite/MDAnalysisTests/data/molecule.sdf index bea24264a1b..f8741e6cd8d 100644 --- a/testsuite/MDAnalysisTests/data/molecule.sdf +++ b/testsuite/MDAnalysisTests/data/molecule.sdf @@ -1,35 +1,63 @@ - Mrv1906 06042016552D 65.42000 + Mrv1906 06042017153D 65.42000 - 21 21 0 0 1 0 999 V2000 - -1.1690 1.9925 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.1690 1.1675 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4545 0.7550 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4545 -0.0700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.2599 -0.4825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.2599 -1.3075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.4545 -1.7200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9744 -1.7200 0.0000 N 0 3 2 0 0 0 0 0 0 0 0 0 - 0.5619 -2.4345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.2631 -2.4345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.6756 -3.1490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.5006 -3.1490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6889 -2.1325 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 - 1.6889 -2.9575 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.4033 -3.3700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.4033 -4.1950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6889 -4.6075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9744 -4.1950 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 0.9744 -3.3700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.4033 -1.7200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 - 1.3869 -1.0056 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 49 49 0 0 1 0 999 V2000 + 4.1617 -6.6571 -4.2692 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.3732 -6.3648 -2.9716 C 0 0 2 0 0 0 0 0 0 0 0 0 + 2.2915 -5.2684 -3.1684 C 0 0 2 0 0 0 0 0 0 0 0 0 + 1.6144 -4.8365 -1.8398 C 0 0 2 0 0 0 0 0 0 0 0 0 + 0.5462 -3.7266 -2.0410 C 0 0 2 0 0 0 0 0 0 0 0 0 + -0.1543 -3.3758 -0.7697 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0713 -4.1813 -0.4645 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1202 -2.2528 0.1222 N 0 3 2 0 0 0 0 0 0 0 0 0 + 0.3380 -2.7978 1.5409 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.3703 -3.8157 1.7531 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2854 -4.7720 2.6926 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3168 -5.7663 2.8920 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0847 -1.2926 0.1515 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.3210 -0.4395 -1.0626 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1119 0.9626 -1.0056 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3022 1.7732 -2.1391 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7066 1.1958 -3.3531 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9382 -0.1889 -3.4295 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7519 -0.9955 -2.2911 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3387 -1.9286 0.4298 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3554 -1.4508 -0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4847 -6.9830 -5.0617 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6907 -5.7600 -4.6002 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8962 -7.4454 -4.0907 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0831 -6.0473 -2.2028 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8964 -7.2888 -2.6319 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5293 -5.6461 -3.8560 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7551 -4.3889 -3.6217 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3872 -4.4651 -1.1634 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1479 -5.7116 -1.3765 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1979 -4.0762 -2.7637 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0149 -2.8541 -2.4933 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5939 -1.9790 2.2218 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6069 -3.2018 1.9191 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1878 -3.7852 1.1953 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4812 -4.8129 3.2750 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2887 -5.2853 3.0242 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3566 -6.4315 2.0274 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1040 -6.3627 3.7820 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9513 -0.6146 1.0035 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8146 1.4079 -0.1351 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1445 2.7824 -2.0793 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8357 1.7843 -4.1783 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2428 -0.6076 -4.3112 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9401 -1.9949 -2.3692 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2461 -2.3218 1.3214 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5469 -0.6522 0.4639 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2588 -0.9729 -1.2334 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2520 -2.0681 -0.3060 H 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 2 3 1 0 0 0 0 3 4 1 0 0 0 0 4 5 1 0 0 0 0 5 6 1 0 0 0 0 6 7 2 0 0 0 0 - 8 6 1 1 0 0 0 + 8 6 1 0 0 0 0 8 9 1 0 0 0 0 9 10 1 0 0 0 0 10 11 2 0 0 0 0 @@ -42,8 +70,36 @@ 17 18 1 0 0 0 0 18 19 2 0 0 0 0 14 19 1 0 0 0 0 - 13 20 1 6 0 0 0 - 8 21 1 6 0 0 0 + 13 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 1 22 1 0 0 0 0 + 1 23 1 0 0 0 0 + 1 24 1 0 0 0 0 + 2 25 1 0 0 0 0 + 2 26 1 0 0 0 0 + 3 27 1 0 0 0 0 + 3 28 1 0 0 0 0 + 4 29 1 0 0 0 0 + 4 30 1 0 0 0 0 + 5 31 1 0 0 0 0 + 5 32 1 0 0 0 0 + 9 33 1 0 0 0 0 + 9 34 1 0 0 0 0 + 10 35 1 0 0 0 0 + 11 36 1 0 0 0 0 + 12 37 1 0 0 0 0 + 12 38 1 0 0 0 0 + 12 39 1 0 0 0 0 + 13 40 1 0 0 0 0 + 15 41 1 0 0 0 0 + 16 42 1 0 0 0 0 + 17 43 1 0 0 0 0 + 18 44 1 0 0 0 0 + 19 45 1 0 0 0 0 + 20 46 1 0 0 0 0 + 21 47 1 0 0 0 0 + 21 48 1 0 0 0 0 + 21 49 1 0 0 0 0 M CHG 1 8 1 M END > From ffbf1494a9f21f46fce22bf2294bec48e0be02d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 4 Jun 2020 17:39:24 +0200 Subject: [PATCH 10/39] SDF test --- .../MDAnalysisTests/topology/test_rdkit.py | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 13bbf1680a0..2b5dcd3d08f 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -26,7 +26,7 @@ import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase -from MDAnalysisTests.datafiles import mol2_molecule, PDB_helix +from MDAnalysisTests.datafiles import mol2_molecule, PDB_helix, SDF_molecule Chem = pytest.importorskip('rdkit.Chem') @@ -105,4 +105,27 @@ class TestRDKitParserSMILES(RDKitParserBase): def filename(self): mol = Chem.MolFromSmiles(self.ref_filename) mol = Chem.AddHs(mol) - return mol \ No newline at end of file + return mol + + +class TestRDKitParserSDF(RDKitParserBase): + ref_filename = SDF_molecule + + guessed_attrs = ['types'] + + expected_n_atoms = 49 + expected_n_residues = 1 + expected_n_segments = 1 + expected_n_bonds = 49 + + @pytest.fixture + def filename(self): + return Chem.SDMolSupplier(SDF_molecule, removeHs=False, + sanitize=False)[0] + + def test_bond_orders(self, top, filename): + expected = [1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1] + expected = [float(i) for i in expected] + assert top.bonds.order == expected \ No newline at end of file From 8736750d54590514898b7c7c0750523787347dcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 4 Jun 2020 18:55:20 +0200 Subject: [PATCH 11/39] added formal charges + attrs specific to PDB files --- package/MDAnalysis/topology/RDKitParser.py | 30 +++++++++++++++++-- .../MDAnalysisTests/topology/test_rdkit.py | 5 ++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 34b744d8758..c2cc9d7d3ad 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -54,11 +54,16 @@ Atomtypes, Elements, Masses, + Charges, Bonds, Resids, Resnums, Resnames, Segids, + AltLocs, + ChainIDs, + Occupancies, + Tempfactors, ) from ..core.topology import Topology @@ -100,25 +105,35 @@ def parse(self, **kwargs): resnames = [] elements = [] masses = [] + charges = [] ids = [] atomtypes = [] segids = [] + altlocs = [] + chainids = [] + occupancies = [] + tempfactors = [] for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) masses.append(atom.GetMass()) + charges.append(atom.GetFormalCharge()) mi = atom.GetMonomerInfo() if mi: # atom name and residue info are present names.append(mi.GetName().strip()) resnums.append(mi.GetResidueNumber()) resnames.append(mi.GetResidueName()) segids.append(mi.GetSegmentNumber()) + altlocs.append(mi.GetAltLoc().strip()) + chainids.append(mi.GetChainId().strip()) + occupancies.append(mi.GetOccupancy()) + tempfactors.append(mi.GetTempFactor()) else: for prop, value in atom.GetPropsAsDict(True).items(): if 'atomname' in prop.lower(): # usually _TriposAtomName names.append(value) elif 'atomtype' in prop.lower(): # usually _TriposAtomType - atomtypes.append(value) + atomtypes.append(value) # make Topology attributes attrs = [] @@ -126,11 +141,12 @@ def parse(self, **kwargs): # * Attributes always present * - # Atom index, symbol and mass + # Atom attributes for vals, Attr, dtype in ( (ids, Atomids, np.int32), (elements, Elements, object), (masses, Masses, np.float32), + (charges, Charges, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) @@ -161,6 +177,16 @@ def parse(self, **kwargs): else: atomtypes = guessers.guess_types(names) attrs.append(Atomtypes(atomtypes, guessed=True)) + + # PDB only + for vals, Attr, dtype in ( + (altlocs, AltLocs, object), + (chainids, ChainIDs, object), + (occupancies, Occupancies, np.float32), + (tempfactors, Tempfactors, np.float32), + ): + if vals: + attrs.append(Attr(np.array(vals, dtype=dtype))) # Residue if any(resnums) and not any(val is None for val in resnums): diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 2b5dcd3d08f..750b773f3d0 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -32,7 +32,7 @@ class RDKitParserBase(ParserBase): parser = mda.topology.RDKitParser.RDKitParser - expected_attrs = ['ids', 'names', 'elements', 'masses', + expected_attrs = ['ids', 'names', 'elements', 'masses', 'charges', 'resids', 'resnums', 'segids', 'bonds', @@ -78,7 +78,8 @@ def test_bond_orders(self, top, filename): class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix - expected_attrs = RDKitParserBase.expected_attrs + ['resnames'] + expected_attrs = RDKitParserBase.expected_attrs + ['resnames', 'altLocs', + 'chainIDs', 'occupancies', 'tempfactors'] guessed_attrs = ['types'] expected_n_atoms = 137 From db73687f569f30e4669c5c21bb1043b962e1261e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 4 Jun 2020 21:22:28 +0200 Subject: [PATCH 12/39] fix charges to partial instead of formal --- package/MDAnalysis/topology/RDKitParser.py | 35 +++++++++++++++---- .../MDAnalysisTests/topology/test_rdkit.py | 4 ++- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index c2cc9d7d3ad..cce110c8a09 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -117,7 +117,6 @@ def parse(self, **kwargs): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) masses.append(atom.GetMass()) - charges.append(atom.GetFormalCharge()) mi = atom.GetMonomerInfo() if mi: # atom name and residue info are present names.append(mi.GetName().strip()) @@ -129,12 +128,29 @@ def parse(self, **kwargs): occupancies.append(mi.GetOccupancy()) tempfactors.append(mi.GetTempFactor()) else: - for prop, value in atom.GetPropsAsDict(True).items(): - if 'atomname' in prop.lower(): # usually _TriposAtomName - names.append(value) - elif 'atomtype' in prop.lower(): # usually _TriposAtomType - atomtypes.append(value) + # atom name (MOL2 only) + try: + names.append(atom.GetProp('_TriposAtomName')) + except KeyError: + pass + # atom type (MOL2 only) + try: + atomtypes.append(atom.GetProp('_TriposAtomType')) + except KeyError: + pass + # gasteiger charge (computed): + # if the user took the time to compute them, make it a priority + # over charges read from a MOL2 file + try: + charges.append(atom.GetProp('_GasteigerCharge')) + except KeyError: + # partial charge (MOL2 only) + try: + charges.append(atom.GetProp('_TriposPartialCharge')) + except KeyError: + pass + # make Topology attributes attrs = [] n_atoms = len(ids) @@ -146,7 +162,6 @@ def parse(self, **kwargs): (ids, Atomids, np.int32), (elements, Elements, object), (masses, Masses, np.float32), - (charges, Charges, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) @@ -177,6 +192,12 @@ def parse(self, **kwargs): else: atomtypes = guessers.guess_types(names) attrs.append(Atomtypes(atomtypes, guessed=True)) + + # Partial charges + if charges: + attrs.append(Charges(np.array(charges, dtype=np.float32))) + else: + pass # no guesser yet # PDB only for vals, Attr, dtype in ( diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 750b773f3d0..64fdc6d3852 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -32,7 +32,7 @@ class RDKitParserBase(ParserBase): parser = mda.topology.RDKitParser.RDKitParser - expected_attrs = ['ids', 'names', 'elements', 'masses', 'charges', + expected_attrs = ['ids', 'names', 'elements', 'masses', 'resids', 'resnums', 'segids', 'bonds', @@ -54,6 +54,8 @@ def test_bonds_total_counts(self, top): class TestRDKitParserMOL2(RDKitParserBase): ref_filename = mol2_molecule + expected_attrs = RDKitParserBase.expected_attrs + ['charges'] + expected_n_atoms = 49 expected_n_residues = 1 expected_n_segments = 1 From 94f1e4dc7d376310c0f8d25b36666461947df804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 5 Jun 2020 16:45:15 +0200 Subject: [PATCH 13/39] added warning if multiple partial charges --- package/MDAnalysis/topology/RDKitParser.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index cce110c8a09..38c9a827077 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -44,6 +44,7 @@ from __future__ import absolute_import, division import logging +import warnings import numpy as np from .base import TopologyReaderBase, change_squash @@ -113,6 +114,16 @@ def parse(self, **kwargs): chainids = [] occupancies = [] tempfactors = [] + + # check if multiple charges present + atom = mol.GetAtomWithIdx(0) + if atom.HasProp('_GasteigerCharge') and ( + atom.HasProp('_TriposPartialCharge') + ): + warnings.warn( + 'Both _GasteigerCharge and _TriposPartialCharge properties ', + 'are present. Using Gasteiger charges by default.') + for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) @@ -142,11 +153,11 @@ def parse(self, **kwargs): # if the user took the time to compute them, make it a priority # over charges read from a MOL2 file try: - charges.append(atom.GetProp('_GasteigerCharge')) + charges.append(atom.GetDoubleProp('_GasteigerCharge')) except KeyError: # partial charge (MOL2 only) try: - charges.append(atom.GetProp('_TriposPartialCharge')) + charges.append(atom.GetDoubleProp('_TriposPartialCharge')) except KeyError: pass From b60a50e7a954e74b387e26d648396cf2d8e6fa74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 5 Jun 2020 18:19:43 +0200 Subject: [PATCH 14/39] updated parser doc --- package/MDAnalysis/topology/RDKitParser.py | 33 ++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 38c9a827077..f3ee4552b24 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -25,14 +25,21 @@ RDKit topology parser ===================== -Converts a `RDKit `_ +Converts an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`. Example ------- + TODO + +See Also +-------- +:mod:`MDAnalysis.coordinates.RDKit` + + Classes ------- @@ -74,6 +81,28 @@ class RDKitParser(TopologyReaderBase): """ For RDKit structures + + Create the following Attributes: + - Atomids + - Atomnames + - Elements + - Masses + - Bonds + - Resids + - Resnums + - Segids + + Guesses the following: + - Atomtypes + + Depending on RDKit's input, the following Attributes might be present: + - Charges + - Resnames + - AltLocs + - ChainIDs + - Occupancies + - Tempfactors + """ format = 'RDKIT' @@ -95,7 +124,7 @@ def parse(self, **kwargs): Returns ------- - MDAnalysis *Topology* object + MDAnalysis Topology object """ mol = self.filename From 73342e2037d7e22adf0828e65b26850286153cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Sat, 6 Jun 2020 01:45:33 +0200 Subject: [PATCH 15/39] universe.from_smiles + tests on MOL2 charges --- package/MDAnalysis/core/universe.py | 35 ++++++++++ package/MDAnalysis/topology/RDKitParser.py | 11 +--- .../MDAnalysisTests/core/test_universe.py | 6 ++ .../MDAnalysisTests/topology/test_rdkit.py | 65 ++++++++++++++++++- 4 files changed, 107 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 160bd6a328b..42cc4b97e9b 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1289,6 +1289,41 @@ def _fragdict(self): fragdict[a.ix] = fraginfo(i, f) return fragdict + + @classmethod + def from_smiles(cls, smiles, sanitize=True, addHs=True, **kwargs): + """Create a Universe from a SMILES string with rdkit + + Parameters + ---------- + smiles : str + SMILES string + + sanitize : bool (optional, default True) + Toggle the sanitization of the molecule + + addHs : bool (optional, default True) + Add all necessary hydrogens to the molecule + + kwargs : dict + Parameters passed on Universe creation + + Returns + ------- + :class:`~MDAnalysis.core.Universe` + """ + try: + from rdkit import Chem + except ImportError as e: + raise ImportError( + "Creating a Universe from a SMILES string requires RDKit but " + "it does not appear to be installed: {0}".format(e)) + + mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) + if addHs: + mol = Chem.AddHs(mol) + + return cls(mol, **kwargs) # TODO: what is the point of this function??? diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index f3ee4552b24..6d9aabdf46e 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -25,14 +25,7 @@ RDKit topology parser ===================== -Converts an `RDKit `_ -:class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`. - - -Example -------- - -TODO +Converts an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`. See Also @@ -150,7 +143,7 @@ def parse(self, **kwargs): atom.HasProp('_TriposPartialCharge') ): warnings.warn( - 'Both _GasteigerCharge and _TriposPartialCharge properties ', + 'Both _GasteigerCharge and _TriposPartialCharge properties ' 'are present. Using Gasteiger charges by default.') for atom in mol.GetAtoms(): diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 5f8d73c0264..d5cce8c8643 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -216,6 +216,12 @@ def test_universe_topology_class_with_coords(self): assert_equal(u.trajectory.n_frames, u2.trajectory.n_frames) assert u2._topology is u._topology + def test_universe_from_smiles(self): + smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + u = mda.Universe.from_smiles(smi, addHs=False, format='RDKIT') + assert u.atoms.n_atoms == 14 + assert len(u.bonds.indices) == 15 + class TestUniverse(object): # older tests, still useful diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 64fdc6d3852..369f8e208b5 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -22,13 +22,17 @@ # from __future__ import absolute_import +import warnings import pytest +import numpy as np +from numpy.testing import assert_array_equal import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import mol2_molecule, PDB_helix, SDF_molecule Chem = pytest.importorskip('rdkit.Chem') +AllChem = pytest.importorskip('rdkit.Chem.AllChem') class RDKitParserBase(ParserBase): parser = mda.topology.RDKitParser.RDKitParser @@ -66,6 +70,34 @@ def filename(self): return Chem.MolFromMol2File(self.ref_filename, removeHs=False, sanitize=False) + def _create_mol_gasteiger_charges(self): + mol = Chem.MolFromMol2File(self.ref_filename, removeHs=False, + sanitize=True) + AllChem.ComputeGasteigerCharges(mol) + return mol + + def _remove_tripos_charges(self, mol): + for atom in mol.GetAtoms(): + atom.ClearProp("_TriposPartialCharge") + + @pytest.fixture + def top_gas_tripos(self): + mol = self._create_mol_gasteiger_charges() + return self.parser(mol).parse() + + @pytest.fixture + def filename_gasteiger(self): + mol = self._create_mol_gasteiger_charges() + self._remove_tripos_charges(mol) + return mol + + @pytest.fixture + def top_gasteiger(self): + mol = self._create_mol_gasteiger_charges() + self._remove_tripos_charges(mol) + return self.parser(mol).parse() + + def test_bond_orders(self, top, filename): # The 3 first aromatic bonds in the mol2 file are not actually # aromatic but just part of a conjugated system. RDKit doesn't follow @@ -75,6 +107,37 @@ def test_bond_orders(self, top, filename): 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1] expected = [float(i) for i in expected] assert top.bonds.order == expected + + def test_multiple_charge_priority(self, + top_gas_tripos, filename_gasteiger): + expected = np.array([ + a.GetDoubleProp('_GasteigerCharge') for a in + filename_gasteiger.GetAtoms()], dtype=np.float32) + assert_array_equal(expected, top_gas_tripos.charges.values) + + def test_multiple_charge_props_warning(self): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + mol = self._create_mol_gasteiger_charges() + # Trigger a warning. + top = self.parser(mol).parse() + # Verify the warning + assert len(w) == 1 + assert "_GasteigerCharge and _TriposPartialCharge" in str( + w[-1].message) + + def test_gasteiger_charges(self, top_gasteiger, filename_gasteiger): + expected = np.array([ + a.GetDoubleProp('_GasteigerCharge') for a in + filename_gasteiger.GetAtoms()], dtype=np.float32) + assert_array_equal(expected, top_gasteiger.charges.values) + + def test_tripos_charges(self, top, filename): + expected = np.array([ + a.GetDoubleProp('_TriposPartialCharge') for a in filename.GetAtoms() + ], dtype=np.float32) + assert_array_equal(expected, top.charges.values) class TestRDKitParserPDB(RDKitParserBase): @@ -92,7 +155,7 @@ class TestRDKitParserPDB(RDKitParserBase): @pytest.fixture def filename(self): return Chem.MolFromPDBFile(self.ref_filename, removeHs=False) - + class TestRDKitParserSMILES(RDKitParserBase): ref_filename = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" From 4baae688314bde5744b721716d134f5d6a5791ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 8 Jun 2020 16:41:14 +0200 Subject: [PATCH 16/39] universe from smiles generate coordinates and tests + doc --- .appveyor.yml | 2 +- .travis.yml | 2 +- package/MDAnalysis/core/universe.py | 28 ++++++++++++++- .../topology/RDKitParser.rst | 1 + .../documentation_pages/topology_modules.rst | 1 + .../MDAnalysisTests/core/test_universe.py | 35 +++++++++++++++++-- 6 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst diff --git a/.appveyor.yml b/.appveyor.yml index c9cd30207a1..fbeb4a821c0 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -9,7 +9,7 @@ cache: environment: global: - CONDA_CHANNELS: conda-forge rdkit + CONDA_CHANNELS: conda-forge CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm rdkit PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed DEBUG: "False" diff --git a/.travis.yml b/.travis.yml index 37a37db6c03..1eba1ce9a12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ env: - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 rdkit" - - CONDA_CHANNELS='biobuilds conda-forge rdkit' + - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - PIP_DEPENDENCIES="duecredit parmed" - NUMPY_VERSION=stable diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 42cc4b97e9b..9fb73b51de8 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1291,7 +1291,8 @@ def _fragdict(self): return fragdict @classmethod - def from_smiles(cls, smiles, sanitize=True, addHs=True, **kwargs): + def from_smiles(cls, smiles, sanitize=True, addHs=True, + generate_coordinates=True, n_frames=1, **kwargs): """Create a Universe from a SMILES string with rdkit Parameters @@ -1305,23 +1306,48 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, **kwargs): addHs : bool (optional, default True) Add all necessary hydrogens to the molecule + generate_coordinates : bool (optional, default True) + Generate 3D coordinates using RDKit's `AllChem.EmbedMultipleConfs` + function. Requires adding hydrogens with the `addHs` parameter + + n_frames : int (optional, default 1) + Number of frames to generate coordinates for. Ignored if + `generate_coordinates=False` + kwargs : dict Parameters passed on Universe creation Returns ------- :class:`~MDAnalysis.core.Universe` + + Example + ------- + To create a Universe with 10 conformers of ethanol :: + >>> u = mda.Universe.from_smiles('CCO', n_frames=10) + >>> u + """ try: from rdkit import Chem + from rdkit.Chem import AllChem except ImportError as e: raise ImportError( "Creating a Universe from a SMILES string requires RDKit but " "it does not appear to be installed: {0}".format(e)) mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) + if mol is None: + raise SyntaxError('Error while parsing SMILES {0}'.format(smiles)) if addHs: mol = Chem.AddHs(mol) + if generate_coordinates: + if not addHs: + raise ValueError("Generating coordinates requires adding " + "hydrogens with `addHs=True`") + assert (type(n_frames) is int) and (n_frames > 0), ("n_frames must" + " be a non-zero positive integer instead of {0}".format(n_frames)) + AllChem.EmbedMultipleConfs(mol, n_frames) return cls(mol, **kwargs) diff --git a/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst new file mode 100644 index 00000000000..9f6bbd1dac9 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/topology/RDKitParser.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.topology.RDKitParser \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index ed8caba8ce6..04ebf433e27 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -43,6 +43,7 @@ topology file format in the *topology_format* keyword argument to topology/PDBQTParser topology/PQRParser topology/PSFParser + topology/RDKitParser topology/TOPParser topology/TPRParser topology/TXYZParser diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index d5cce8c8643..1596f88f80a 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -216,12 +216,43 @@ def test_universe_topology_class_with_coords(self): assert_equal(u.trajectory.n_frames, u2.trajectory.n_frames) assert u2._topology is u._topology - def test_universe_from_smiles(self): + +class TestUniverseFromSmiles(object): + def setup_class(self): + pytest.importorskip("rdkit.Chem") + + def test_default(self): + smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" + u = mda.Universe.from_smiles(smi, format='RDKIT') + assert u.atoms.n_atoms == 24 + assert len(u.bonds.indices) == 25 + + def test_from_bad_smiles(self): + with pytest.raises(SyntaxError) as e: + u = mda.Universe.from_smiles("J", format='RDKIT') + assert "Error while parsing SMILES" in str(e.value) + + def test_no_Hs(self): smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" - u = mda.Universe.from_smiles(smi, addHs=False, format='RDKIT') + u = mda.Universe.from_smiles(smi, addHs=False, + generate_coordinates=False, format='RDKIT') assert u.atoms.n_atoms == 14 assert len(u.bonds.indices) == 15 + def test_gencoords_without_Hs_error(self): + with pytest.raises(ValueError) as e: + u = mda.Universe.from_smiles("CCO", addHs=False, + generate_coordinates=True, format='RDKIT') + assert "requires adding hydrogens" in str (e.value) + + def test_gencoords_n_frames(self): + with pytest.raises(AssertionError) as e: + u = mda.Universe.from_smiles("CCO", n_frames=0, format='RDKIT') + assert "non-zero positive integer" in str (e.value) + with pytest.raises(AssertionError) as e: + u = mda.Universe.from_smiles("CCO", n_frames=2.1, format='RDKIT') + assert "non-zero positive integer" in str (e.value) + class TestUniverse(object): # older tests, still useful From bf5ee1a26db2775b1d5686b370f9a7ec152786ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 8 Jun 2020 16:52:58 +0200 Subject: [PATCH 17/39] renamed n_frames to numConfs to be in line with RDKit --- package/MDAnalysis/core/universe.py | 12 ++++++------ testsuite/MDAnalysisTests/core/test_universe.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 9fb73b51de8..ae8c821e99d 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1292,7 +1292,7 @@ def _fragdict(self): @classmethod def from_smiles(cls, smiles, sanitize=True, addHs=True, - generate_coordinates=True, n_frames=1, **kwargs): + generate_coordinates=True, numConfs=1, **kwargs): """Create a Universe from a SMILES string with rdkit Parameters @@ -1310,7 +1310,7 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, Generate 3D coordinates using RDKit's `AllChem.EmbedMultipleConfs` function. Requires adding hydrogens with the `addHs` parameter - n_frames : int (optional, default 1) + numConfs : int (optional, default 1) Number of frames to generate coordinates for. Ignored if `generate_coordinates=False` @@ -1324,7 +1324,7 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, Example ------- To create a Universe with 10 conformers of ethanol :: - >>> u = mda.Universe.from_smiles('CCO', n_frames=10) + >>> u = mda.Universe.from_smiles('CCO', numConfs=10) >>> u """ @@ -1345,9 +1345,9 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, if not addHs: raise ValueError("Generating coordinates requires adding " "hydrogens with `addHs=True`") - assert (type(n_frames) is int) and (n_frames > 0), ("n_frames must" - " be a non-zero positive integer instead of {0}".format(n_frames)) - AllChem.EmbedMultipleConfs(mol, n_frames) + assert (type(numConfs) is int) and (numConfs > 0), ("numConfs must" + " be a non-zero positive integer instead of {0}".format(numConfs)) + AllChem.EmbedMultipleConfs(mol, numConfs) return cls(mol, **kwargs) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 1596f88f80a..240035c65d1 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -245,12 +245,12 @@ def test_gencoords_without_Hs_error(self): generate_coordinates=True, format='RDKIT') assert "requires adding hydrogens" in str (e.value) - def test_gencoords_n_frames(self): + def test_generate_coordinates_numConfs(self): with pytest.raises(AssertionError) as e: - u = mda.Universe.from_smiles("CCO", n_frames=0, format='RDKIT') + u = mda.Universe.from_smiles("CCO", numConfs=0, format='RDKIT') assert "non-zero positive integer" in str (e.value) with pytest.raises(AssertionError) as e: - u = mda.Universe.from_smiles("CCO", n_frames=2.1, format='RDKIT') + u = mda.Universe.from_smiles("CCO", numConfs=2.1, format='RDKIT') assert "non-zero positive integer" in str (e.value) From ecc53b1496749f128fc562bcfe1d7b8028d879ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 8 Jun 2020 18:36:43 +0200 Subject: [PATCH 18/39] bond order testing from rdkit --- testsuite/MDAnalysisTests/topology/test_rdkit.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 369f8e208b5..39b39c60be1 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -102,10 +102,7 @@ def test_bond_orders(self, top, filename): # The 3 first aromatic bonds in the mol2 file are not actually # aromatic but just part of a conjugated system. RDKit doesn't follow # the mol2 file and marks them as single, even with `sanitize=False`. - expected = [1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1.5, 1.5, 1.5, 1, 1.5, 1, 1.5, 1, 1.5, 1, 1, 1, - 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1] - expected = [float(i) for i in expected] + expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected def test_multiple_charge_priority(self, @@ -190,8 +187,5 @@ def filename(self): sanitize=False)[0] def test_bond_orders(self, top, filename): - expected = [1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1] - expected = [float(i) for i in expected] + expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected \ No newline at end of file From 396da83110b6a5adb4dce9a7a0b864b4fee91b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 8 Jun 2020 19:16:05 +0200 Subject: [PATCH 19/39] added aromaticity attr --- package/MDAnalysis/core/topologyattrs.py | 12 ++++++++++++ package/MDAnalysis/topology/RDKitParser.py | 4 ++++ .../MDAnalysisTests/topology/test_rdkit.py | 17 +++++++---------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 7e66fd82f36..156ed2ea818 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -1665,6 +1665,18 @@ def _gen_initial_values(na, nr, ns): return np.zeros(na) +class Aromaticities(AtomAttr): + """Aromaticity (RDKit)""" + attrname = "aromaticities" + singular = "aromaticity" + per_object = "atom" + dtype = bool + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array([False for _ in range(na)], dtype=bool) + + class ResidueAttr(TopologyAttr): attrname = 'residueattrs' singular = 'residueattr' diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 6d9aabdf46e..681e94d96ea 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -56,6 +56,7 @@ Elements, Masses, Charges, + Aromaticities, Bonds, Resids, Resnums, @@ -129,6 +130,7 @@ def parse(self, **kwargs): elements = [] masses = [] charges = [] + aromatics = [] ids = [] atomtypes = [] segids = [] @@ -150,6 +152,7 @@ def parse(self, **kwargs): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) masses.append(atom.GetMass()) + aromatics.append(atom.GetIsAromatic()) mi = atom.GetMonomerInfo() if mi: # atom name and residue info are present names.append(mi.GetName().strip()) @@ -195,6 +198,7 @@ def parse(self, **kwargs): (ids, Atomids, np.int32), (elements, Elements, object), (masses, Masses, np.float32), + (aromatics, Aromaticities, bool), ): attrs.append(Attr(np.array(vals, dtype=dtype))) diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 39b39c60be1..99ea87046da 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -36,7 +36,7 @@ class RDKitParserBase(ParserBase): parser = mda.topology.RDKitParser.RDKitParser - expected_attrs = ['ids', 'names', 'elements', 'masses', + expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', 'resids', 'resnums', 'segids', 'bonds', @@ -67,12 +67,10 @@ class TestRDKitParserMOL2(RDKitParserBase): @pytest.fixture def filename(self): - return Chem.MolFromMol2File(self.ref_filename, removeHs=False, - sanitize=False) + return Chem.MolFromMol2File(self.ref_filename, removeHs=False) def _create_mol_gasteiger_charges(self): - mol = Chem.MolFromMol2File(self.ref_filename, removeHs=False, - sanitize=True) + mol = Chem.MolFromMol2File(self.ref_filename, removeHs=False) AllChem.ComputeGasteigerCharges(mol) return mol @@ -99,9 +97,6 @@ def top_gasteiger(self): def test_bond_orders(self, top, filename): - # The 3 first aromatic bonds in the mol2 file are not actually - # aromatic but just part of a conjugated system. RDKit doesn't follow - # the mol2 file and marks them as single, even with `sanitize=False`. expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected @@ -136,6 +131,9 @@ def test_tripos_charges(self, top, filename): ], dtype=np.float32) assert_array_equal(expected, top.charges.values) + def test_aromaticity(self, top, filename): + expected = np.array([atom.GetIsAromatic() for atom in filename.GetAtoms()]) + assert_array_equal(expected, top.aromaticities.values) class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix @@ -183,8 +181,7 @@ class TestRDKitParserSDF(RDKitParserBase): @pytest.fixture def filename(self): - return Chem.SDMolSupplier(SDF_molecule, removeHs=False, - sanitize=False)[0] + return Chem.SDMolSupplier(SDF_molecule, removeHs=False)[0] def test_bond_orders(self, top, filename): expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] From 654026057d9e2f65284819b32b4fe5870b91e2a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 9 Jun 2020 22:34:16 +0200 Subject: [PATCH 20/39] coordinates reader --- package/MDAnalysis/coordinates/RDKit.py | 85 ++++++++++++++++++++++ package/MDAnalysis/coordinates/__init__.py | 1 + 2 files changed, 86 insertions(+) create mode 100644 package/MDAnalysis/coordinates/RDKit.py diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py new file mode 100644 index 00000000000..5a5f3c87694 --- /dev/null +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -0,0 +1,85 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""RDKit molecule --- :mod:`MDAnalysis.coordinates.RDKit` +================================================================ + +Read coordinates data from an `RDKit `_ :class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitReader` +into a MDAnalysis Universe. Convert it back to a :class:`rdkit.Chem.rdchem.Mol` with +:class:`RDKitConverter`. + + +Example +------- + +TODO + + +Classes +------- + +.. autoclass:: RDKitReader + :members: + +.. autoclass:: RDKitConverter + :members: + + +""" +from __future__ import absolute_import + +import numpy as np + +from . import memory + + +class RDKitReader(memory.MemoryReader): + """Coordinate reader for RDKit.""" + format = 'RDKIT' + + # Structure.coordinates always in Angstrom + units = {'time': None, 'length': 'Angstrom'} + + @staticmethod + def _format_hint(thing): + """Can this reader read *thing*? + + .. versionadded:: 1.0.0 + """ + try: + from rdkit import Chem + except ImportError: + # if we can't import rdkit, it's probably not rdkit + return False + else: + return isinstance(thing, Chem.Mol) + + def __init__(self, filename, **kwargs): + n_atoms = filename.GetNumAtoms() + coordinates = np.array([ + conf.GetPositions() for conf in filename.GetConformers()], + dtype=np.float32) + if coordinates.size == 0: + coordinates = np.empty((1,n_atoms,3), dtype=np.float32) + coordinates[:] = np.nan + super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) \ No newline at end of file diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index d8548f24648..064170976c9 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -745,3 +745,4 @@ class can choose an appropriate reader automatically. from . import null from . import NAMDBIN from . import FHIAIMS +from . import RDKit From d18bb1235cb8b67a1fa30a428a932cce46570363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 10 Jun 2020 00:18:41 +0200 Subject: [PATCH 21/39] added tests on coordinates --- .../MDAnalysisTests/coordinates/test_rdkit.py | 51 +++++++++++++++++++ .../MDAnalysisTests/core/test_universe.py | 4 ++ 2 files changed, 55 insertions(+) create mode 100644 testsuite/MDAnalysisTests/coordinates/test_rdkit.py diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py new file mode 100644 index 00000000000..76520229d32 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -0,0 +1,51 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import absolute_import + +import pytest +import MDAnalysis as mda +import numpy as np +from numpy.testing import (assert_equal, + assert_almost_equal) + +from MDAnalysisTests.datafiles import mol2_molecule + +Chem = pytest.importorskip("rdkit.Chem") + + +class TestRDKitReader(object): + @pytest.fixture + def mol(self): + return Chem.MolFromMol2File(mol2_molecule, removeHs=False) + + @pytest.fixture + def universe(self, mol): + return mda.Universe(mol) + + def test_coordinates(self, mol, universe): + expected = np.array([ + conf.GetPositions() for conf in mol.GetConformers()], + dtype=np.float32) + assert_equal(expected, universe.trajectory.coordinate_array) + assert universe.trajectory.n_frames == 1 + diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 240035c65d1..9d0a0a700e7 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -253,6 +253,10 @@ def test_generate_coordinates_numConfs(self): u = mda.Universe.from_smiles("CCO", numConfs=2.1, format='RDKIT') assert "non-zero positive integer" in str (e.value) + def test_coordinates_SMILES(self): + u = mda.Universe.from_smiles("CCO", numConfs=2) + assert u.trajectory.n_frames == 2 + class TestUniverse(object): # older tests, still useful From 0c8dcc468d2a9a82b282ee16b7943480e8bc9a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 10 Jun 2020 01:08:45 +0200 Subject: [PATCH 22/39] aromatic atomselection + test --- package/MDAnalysis/core/selection.py | 14 ++++++++++++++ .../MDAnalysisTests/coordinates/test_rdkit.py | 5 ++++- .../MDAnalysisTests/core/test_atomselections.py | 11 +++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 2b121ebd708..a953ffdcdf6 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -573,6 +573,20 @@ class AltlocSelection(StringSelection): field = 'altLocs' +class AromaticSelection(Selection): + """Contains all aromatic atoms""" + token = 'aromatic' + field = 'aromaticities' + + def __init__(self, parser, tokens): + pass + + def apply(self, group): + mask = np.zeros(len(group), dtype=np.bool) + mask = getattr(group, self.field, mask) + return group[mask].unique + + class ResidSelection(Selection): """Select atoms based on numerical fields diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 76520229d32..436b48b8e69 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -46,6 +46,9 @@ def test_coordinates(self, mol, universe): expected = np.array([ conf.GetPositions() for conf in mol.GetConformers()], dtype=np.float32) - assert_equal(expected, universe.trajectory.coordinate_array) assert universe.trajectory.n_frames == 1 + assert_equal(expected, universe.trajectory.coordinate_array) + def test_no_coordinates(self): + pass + diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index cd4b3468172..59d8eb788aa 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -520,6 +520,17 @@ def test_molnum(self, universe, selection_string, reference): assert_equal(sel.ids, np.array(reference, dtype=np.int32)) +class TestSelectionRDKit(object): + def test_aromatic(self): + smi = "Cc1ccccc1" # toluene + u = MDAnalysis.Universe.from_smiles(smi, addHs=False, + generate_coordinates=False) + arom = u.select_atoms("aromatic") + not_arom = u.select_atoms("not aromatic") + assert arom.n_atoms == 6 + assert not_arom.n_atoms == 1 + + class TestSelectionsNucleicAcids(object): @pytest.fixture(scope='class') def universe(self): From d9dd7473523d756077cb5bef17b71e23861dee5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 10 Jun 2020 14:36:30 +0200 Subject: [PATCH 23/39] fix minimal dependency --- testsuite/MDAnalysisTests/core/test_atomselections.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 59d8eb788aa..a9753758100 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -521,6 +521,9 @@ def test_molnum(self, universe, selection_string, reference): class TestSelectionRDKit(object): + def setup_class(self): + pytest.importorskip("rdkit.Chem") + def test_aromatic(self): smi = "Cc1ccccc1" # toluene u = MDAnalysis.Universe.from_smiles(smi, addHs=False, From b75297c75d67e9d7fdd0a539b8d5bae914cccde6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 11 Jun 2020 20:38:11 +0200 Subject: [PATCH 24/39] updated docs --- package/MDAnalysis/coordinates/RDKit.py | 9 ++++++++- package/MDAnalysis/core/universe.py | 2 ++ package/MDAnalysis/topology/RDKitParser.py | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index 5a5f3c87694..b044b2fef8c 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -32,7 +32,14 @@ Example ------- -TODO +>>> from rdkit import Chem +>>> import MDAnalysis as mda +>>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False) +>>> u = mda.Universe(mol) +>>> u + +>>> u.trajectory + Classes diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index ae8c821e99d..49942f5ca48 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1327,6 +1327,8 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, >>> u = mda.Universe.from_smiles('CCO', numConfs=10) >>> u + >>> u.trajectory + """ try: from rdkit import Chem diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index 681e94d96ea..b5203563b6c 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -79,6 +79,7 @@ class RDKitParser(TopologyReaderBase): Create the following Attributes: - Atomids - Atomnames + - Aromaticities - Elements - Masses - Bonds From 9aafc6a6eba43735121179a41c259eac1c4d6c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 12 Jun 2020 20:34:16 +0200 Subject: [PATCH 25/39] updated doc --- package/MDAnalysis/coordinates/RDKit.py | 15 ++++++++++++++- package/MDAnalysis/core/selection.py | 5 ++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index b044b2fef8c..09297103dab 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -61,7 +61,10 @@ class RDKitReader(memory.MemoryReader): - """Coordinate reader for RDKit.""" + """Coordinate reader for RDKit. + + .. versionadded:: 1.0.0 + """ format = 'RDKIT' # Structure.coordinates always in Angstrom @@ -82,6 +85,16 @@ def _format_hint(thing): return isinstance(thing, Chem.Mol) def __init__(self, filename, **kwargs): + """Read coordinates from an RDKit molecule. + Each conformer in the original RDKit molecule will be read as a frame + in the resulting universe. + + Parameters + ---------- + + filename : rdkit.Chem.rdchem.Mol + RDKit molecule + """ n_atoms = filename.GetNumAtoms() coordinates = np.array([ conf.GetPositions() for conf in filename.GetConformers()], diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index a953ffdcdf6..b38dc7e1c7d 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -574,7 +574,10 @@ class AltlocSelection(StringSelection): class AromaticSelection(Selection): - """Contains all aromatic atoms""" + """Select aromatic atoms. + + Aromaticity is available in the `aromaticities` attribute and is made + available through RDKit""" token = 'aromatic' field = 'aromaticities' From 77617d27c3956f28dec99cf02f8d975a306d95cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 12 Jun 2020 20:41:07 +0200 Subject: [PATCH 26/39] fix PEP8 and init array --- package/MDAnalysis/core/topologyattrs.py | 2 +- testsuite/MDAnalysisTests/topology/test_rdkit.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 156ed2ea818..1046e67cc2a 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -1674,7 +1674,7 @@ class Aromaticities(AtomAttr): @staticmethod def _gen_initial_values(na, nr, ns): - return np.array([False for _ in range(na)], dtype=bool) + return np.zeros(na, dtype=bool) class ResidueAttr(TopologyAttr): diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 99ea87046da..98c0281e929 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -132,9 +132,11 @@ def test_tripos_charges(self, top, filename): assert_array_equal(expected, top.charges.values) def test_aromaticity(self, top, filename): - expected = np.array([atom.GetIsAromatic() for atom in filename.GetAtoms()]) + expected = np.array([ + atom.GetIsAromatic() for atom in filename.GetAtoms()]) assert_array_equal(expected, top.aromaticities.values) + class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix From d6d7fe3289f5a3ceb49e540f0f9bc501960400ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 12 Jun 2020 20:42:43 +0200 Subject: [PATCH 27/39] test combine aromatic selection --- .../core/test_atomselections.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index a9753758100..fc4e251a9aa 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -523,15 +523,27 @@ def test_molnum(self, universe, selection_string, reference): class TestSelectionRDKit(object): def setup_class(self): pytest.importorskip("rdkit.Chem") - - def test_aromatic(self): - smi = "Cc1ccccc1" # toluene + + @pytest.fixture + def u(self): + smi = "Cc1cNcc1" u = MDAnalysis.Universe.from_smiles(smi, addHs=False, generate_coordinates=False) - arom = u.select_atoms("aromatic") - not_arom = u.select_atoms("not aromatic") - assert arom.n_atoms == 6 - assert not_arom.n_atoms == 1 + return u + + def test_aromatic(self, u): + sel = u.select_atoms("aromatic") + assert sel.n_atoms == 5 + + def test_not_aromatic(self, u): + sel = u.select_atoms("not aromatic") + assert sel.n_atoms == 1 + + def test_combine_aromatic(self, u): + sel = u.select_atoms("type N and aromatic") + assert sel.n_atoms == 1 + sel = u.select_atoms("type C and aromatic") + assert sel.n_atoms == 4 class TestSelectionsNucleicAcids(object): From dc7ec773dfc74944ff28d8d7cb95ebffcac4522e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 12 Jun 2020 20:43:11 +0200 Subject: [PATCH 28/39] test compare with mol2reader --- testsuite/MDAnalysisTests/coordinates/test_rdkit.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 436b48b8e69..56f99fa452a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -51,4 +51,10 @@ def test_coordinates(self, mol, universe): def test_no_coordinates(self): pass + + def test_compare_mol2reader(self, universe): + mol2 = mda.Universe(mol2_molecule) + assert universe.trajectory.n_frames == mol2.trajectory.n_frames + assert_equal(universe.trajectory.ts.positions, + mol2.trajectory.ts.positions) From 18dc450becd9439b834b8ccc8c931b2414ef0d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 12 Jun 2020 21:29:33 +0200 Subject: [PATCH 29/39] added kwargs for EmbedMultipleConfs --- package/MDAnalysis/core/universe.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 49942f5ca48..13bd5661bf2 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1292,7 +1292,8 @@ def _fragdict(self): @classmethod def from_smiles(cls, smiles, sanitize=True, addHs=True, - generate_coordinates=True, numConfs=1, **kwargs): + generate_coordinates=True, numConfs=1, + rdkit_kwargs={}, **kwargs): """Create a Universe from a SMILES string with rdkit Parameters @@ -1314,6 +1315,9 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, Number of frames to generate coordinates for. Ignored if `generate_coordinates=False` + rdkit_kwargs : dict (optional) + Other arguments passed to the RDKit `EmbedMultipleConfs` function + kwargs : dict Parameters passed on Universe creation @@ -1347,9 +1351,11 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, if not addHs: raise ValueError("Generating coordinates requires adding " "hydrogens with `addHs=True`") + + numConfs = rdkit_kwargs.pop("numConfs", numConfs) assert (type(numConfs) is int) and (numConfs > 0), ("numConfs must" " be a non-zero positive integer instead of {0}".format(numConfs)) - AllChem.EmbedMultipleConfs(mol, numConfs) + AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs) return cls(mol, **kwargs) From 0c891f4dfe88565912eb6133519252e3c60d948a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 15 Jun 2020 14:58:26 +0200 Subject: [PATCH 30/39] example for EmbedMultipleConfs kwargs --- package/MDAnalysis/core/universe.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 13bd5661bf2..1f0e86e4cdc 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1325,22 +1325,28 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, ------- :class:`~MDAnalysis.core.Universe` - Example - ------- + Examples + -------- To create a Universe with 10 conformers of ethanol :: >>> u = mda.Universe.from_smiles('CCO', numConfs=10) >>> u >>> u.trajectory + + To use a different conformer generation algorithm, like ETKDGv3 :: + >>> u = mda.Universe.from_smiles('CCO', rdkit_kwargs=dict( + params=AllChem.ETKDGv3())) + >>> u.trajectory + """ try: from rdkit import Chem from rdkit.Chem import AllChem except ImportError as e: - raise ImportError( + six.raise_from(ImportError( "Creating a Universe from a SMILES string requires RDKit but " - "it does not appear to be installed: {0}".format(e)) + "it does not appear to be installed"), e) mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) if mol is None: From eac10ab134ca91ac79ebdc054d57cc30d8f25c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 15 Jun 2020 15:58:52 +0200 Subject: [PATCH 31/39] warning and tests on coordinates --- package/MDAnalysis/coordinates/RDKit.py | 2 ++ .../MDAnalysisTests/coordinates/test_rdkit.py | 27 +++++++++++++++++-- .../MDAnalysisTests/core/test_universe.py | 16 ++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index 09297103dab..cf3810cf73c 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -54,6 +54,7 @@ """ from __future__ import absolute_import +import warnings import numpy as np @@ -100,6 +101,7 @@ def __init__(self, filename, **kwargs): conf.GetPositions() for conf in filename.GetConformers()], dtype=np.float32) if coordinates.size == 0: + warnings.warn("No coordinates found in the RDKit molecule") coordinates = np.empty((1,n_atoms,3), dtype=np.float32) coordinates[:] = np.nan super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs) \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 56f99fa452a..fea8e0dac5f 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -21,6 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import absolute_import +import warnings import pytest import MDAnalysis as mda @@ -31,7 +32,7 @@ from MDAnalysisTests.datafiles import mol2_molecule Chem = pytest.importorskip("rdkit.Chem") - +AllChem = pytest.importorskip("rdkit.Chem.AllChem") class TestRDKitReader(object): @pytest.fixture @@ -49,8 +50,30 @@ def test_coordinates(self, mol, universe): assert universe.trajectory.n_frames == 1 assert_equal(expected, universe.trajectory.coordinate_array) + def test_multi_coordinates(self): + mol = Chem.MolFromSmiles("CCO") + mol = Chem.AddHs(mol) + cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) + umol = mda.Universe(mol) + assert umol.trajectory.n_frames == 3 + expected = np.array([ + conf.GetPositions() for conf in mol.GetConformers()], + dtype=np.float32) + assert_equal(expected, umol.trajectory.coordinate_array) + def test_no_coordinates(self): - pass + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + u = mda.Universe.from_smiles("CCO", generate_coordinates=False) + # Verify the warning + assert len(w) == 1 + assert "No coordinates found" in str( + w[-1].message) + expected = np.empty((1,u.atoms.n_atoms,3), dtype=np.float32) + expected[:] = np.nan + assert_equal(u.trajectory.coordinate_array, expected) def test_compare_mol2reader(self, universe): mol2 = mda.Universe(mol2_molecule) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 9d0a0a700e7..2d2cd4badb9 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -254,8 +254,22 @@ def test_generate_coordinates_numConfs(self): assert "non-zero positive integer" in str (e.value) def test_coordinates_SMILES(self): - u = mda.Universe.from_smiles("CCO", numConfs=2) + u = mda.Universe.from_smiles("C", numConfs=2, + rdkit_kwargs=dict(randomSeed=42)) assert u.trajectory.n_frames == 2 + expected = np.array([ + [[-0.02209686, 0.00321505, 0.01651974], + [-0.6690088 , 0.8893599 , -0.1009085 ], + [-0.37778795, -0.8577519 , -0.58829606], + [ 0.09642092, -0.3151253 , 1.0637809 ], + [ 0.97247267, 0.28030226, -0.3910961 ]], + [[-0.0077073 , 0.00435363, 0.01834692], + [-0.61228824, -0.83705765, -0.38619974], + [-0.41925883, 0.9689095 , -0.3415968 ], + [ 0.03148226, -0.03256683, 1.1267245 ], + [ 1.0077721 , -0.10363862, -0.41727486]]], dtype=np.float32) + assert_almost_equal(u.trajectory.coordinate_array, expected) + class TestUniverse(object): From 5cad0bac52c78d13a2ea36272a7137c50d124c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 15 Jun 2020 16:14:22 +0200 Subject: [PATCH 32/39] drop python 2 support --- package/MDAnalysis/coordinates/RDKit.py | 2 +- package/MDAnalysis/topology/RDKitParser.py | 1 - testsuite/MDAnalysisTests/coordinates/test_rdkit.py | 1 - testsuite/MDAnalysisTests/topology/test_rdkit.py | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index cf3810cf73c..0d56d612ea7 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -53,7 +53,7 @@ """ -from __future__ import absolute_import + import warnings import numpy as np diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index b5203563b6c..c46e5a856cc 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -41,7 +41,6 @@ :inherited-members: """ -from __future__ import absolute_import, division import logging import warnings diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index fea8e0dac5f..e09ae616604 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -20,7 +20,6 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from __future__ import absolute_import import warnings import pytest diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 98c0281e929..695cef5100c 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -20,7 +20,6 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from __future__ import absolute_import import warnings import pytest From 3ab565875a3e59ee534f0f8d296b4e42e5aa6a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 15 Jun 2020 17:50:10 +0200 Subject: [PATCH 33/39] modified tests --- package/MDAnalysis/core/universe.py | 11 ++++--- .../MDAnalysisTests/coordinates/test_rdkit.py | 32 +++++++++---------- .../core/test_atomselections.py | 22 ++++++------- .../MDAnalysisTests/core/test_universe.py | 27 ++++++++++++++-- 4 files changed, 55 insertions(+), 37 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 42001e31821..ffffb1e5f02 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1335,14 +1335,16 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, params=AllChem.ETKDGv3())) >>> u.trajectory + + .. versionadded:: 1.0.0 """ try: from rdkit import Chem from rdkit.Chem import AllChem except ImportError as e: - six.raise_from(ImportError( + raise ImportError( "Creating a Universe from a SMILES string requires RDKit but " - "it does not appear to be installed"), e) + "it does not appear to be installed") from e mol = Chem.MolFromSmiles(smiles, sanitize=sanitize) if mol is None: @@ -1355,8 +1357,9 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, "hydrogens with `addHs=True`") numConfs = rdkit_kwargs.pop("numConfs", numConfs) - assert (type(numConfs) is int) and (numConfs > 0), ("numConfs must" - " be a non-zero positive integer instead of {0}".format(numConfs)) + if not (type(numConfs) is int and numConfs > 0): + raise SyntaxError("numConfs must be a non-zero positive " + "integer instead of {0}".format(numConfs)) AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs) return cls(mol, **kwargs) diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index e09ae616604..9cfe448d8fa 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -35,30 +35,27 @@ class TestRDKitReader(object): @pytest.fixture - def mol(self): + def mol2_mol(self): return Chem.MolFromMol2File(mol2_molecule, removeHs=False) @pytest.fixture - def universe(self, mol): - return mda.Universe(mol) - - def test_coordinates(self, mol, universe): - expected = np.array([ - conf.GetPositions() for conf in mol.GetConformers()], - dtype=np.float32) - assert universe.trajectory.n_frames == 1 - assert_equal(expected, universe.trajectory.coordinate_array) - - def test_multi_coordinates(self): + def smiles_mol(self): mol = Chem.MolFromSmiles("CCO") mol = Chem.AddHs(mol) cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) - umol = mda.Universe(mol) - assert umol.trajectory.n_frames == 3 + return mol + + @pytest.mark.parametrize("rdmol, n_frames", [ + (mol2_mol, 1), + (smiles_mol, 3), + ]) + def test_coordinates(self, rdmol, n_frames): + universe = mda.Universe(rdmol) + assert universe.trajectory.n_frames == n_frames expected = np.array([ - conf.GetPositions() for conf in mol.GetConformers()], + conf.GetPositions() for conf in rdmol.GetConformers()], dtype=np.float32) - assert_equal(expected, umol.trajectory.coordinate_array) + assert_equal(expected, universe.trajectory.coordinate_array) def test_no_coordinates(self): with warnings.catch_warnings(record=True) as w: @@ -74,7 +71,8 @@ def test_no_coordinates(self): expected[:] = np.nan assert_equal(u.trajectory.coordinate_array, expected) - def test_compare_mol2reader(self, universe): + def test_compare_mol2reader(self, mol2_mol): + universe = mda.Universe(mol2_mol) mol2 = mda.Universe(mol2_molecule) assert universe.trajectory.n_frames == mol2.trajectory.n_frames assert_equal(universe.trajectory.ts.positions, diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index c7779b17ea3..b06ea476efb 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -527,19 +527,15 @@ def u(self): generate_coordinates=False) return u - def test_aromatic(self, u): - sel = u.select_atoms("aromatic") - assert sel.n_atoms == 5 - - def test_not_aromatic(self, u): - sel = u.select_atoms("not aromatic") - assert sel.n_atoms == 1 - - def test_combine_aromatic(self, u): - sel = u.select_atoms("type N and aromatic") - assert sel.n_atoms == 1 - sel = u.select_atoms("type C and aromatic") - assert sel.n_atoms == 4 + @pytest.mark.parametrize("sel_str, n_atoms", [ + ("aromatic", 5), + ("not aromatic", 1), + ("type N and aromatic", 1), + ("type C and aromatic", 4), + ]) + def test_selection(self, u, sel_str, n_atoms): + sel = u.select_atoms(sel_str) + assert sel.n_atoms == n_atoms class TestSelectionsNucleicAcids(object): diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 872d794ba30..fb371f70532 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -243,14 +243,35 @@ def test_gencoords_without_Hs_error(self): assert "requires adding hydrogens" in str (e.value) def test_generate_coordinates_numConfs(self): - with pytest.raises(AssertionError) as e: + with pytest.raises(SyntaxError) as e: u = mda.Universe.from_smiles("CCO", numConfs=0, format='RDKIT') assert "non-zero positive integer" in str (e.value) - with pytest.raises(AssertionError) as e: + with pytest.raises(SyntaxError) as e: u = mda.Universe.from_smiles("CCO", numConfs=2.1, format='RDKIT') assert "non-zero positive integer" in str (e.value) - def test_coordinates_SMILES(self): + def test_rdkit_kwargs(self): + # test for bad kwarg: + # Unfortunately, exceptions from Boost cannot be passed to python, + # we cannot `from Boost.Python import ArgumentError` and use it with + # pytest.raises(ArgumentError), so "this is the way" + try: + u = mda.Universe.from_smiles("CCO", rdkit_kwargs=dict(abc=42)) + except Exception as e: + assert "did not match C++ signature" in str(e) + else: + raise AssertionError("RDKit should have raised an ArgumentError " + "from Boost") + # good kwarg + u1 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=42)) + u2 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=51)) + with pytest.raises(AssertionError) as e: + assert_equal(u1.trajectory.coordinate_array, + u2.trajectory.coordinate_array) + assert "Mismatched elements: 15 / 15 (100%)" in str(e.value) + + + def test_coordinates(self): u = mda.Universe.from_smiles("C", numConfs=2, rdkit_kwargs=dict(randomSeed=42)) assert u.trajectory.n_frames == 2 From 0aff0f6263312a8f2500645cadc3362662bd7d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 15 Jun 2020 19:21:58 +0200 Subject: [PATCH 34/39] fix using fixtures in parametrize --- testsuite/MDAnalysisTests/coordinates/test_rdkit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 9cfe448d8fa..3513519b1db 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -46,8 +46,8 @@ def smiles_mol(self): return mol @pytest.mark.parametrize("rdmol, n_frames", [ - (mol2_mol, 1), - (smiles_mol, 3), + (mol2_mol(), 1), + (smiles_mol(), 3), ]) def test_coordinates(self, rdmol, n_frames): universe = mda.Universe(rdmol) From f0497bfdf5f6af9039a29107c592c2a317b1032f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 16 Jun 2020 11:22:23 +0200 Subject: [PATCH 35/39] actual fix for coordinate tests --- .../MDAnalysisTests/coordinates/test_rdkit.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py index 3513519b1db..7bdb845f95a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_rdkit.py +++ b/testsuite/MDAnalysisTests/coordinates/test_rdkit.py @@ -33,18 +33,16 @@ Chem = pytest.importorskip("rdkit.Chem") AllChem = pytest.importorskip("rdkit.Chem.AllChem") -class TestRDKitReader(object): - @pytest.fixture - def mol2_mol(self): - return Chem.MolFromMol2File(mol2_molecule, removeHs=False) +def mol2_mol(): + return Chem.MolFromMol2File(mol2_molecule, removeHs=False) - @pytest.fixture - def smiles_mol(self): - mol = Chem.MolFromSmiles("CCO") - mol = Chem.AddHs(mol) - cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) - return mol +def smiles_mol(): + mol = Chem.MolFromSmiles("CCO") + mol = Chem.AddHs(mol) + cids = AllChem.EmbedMultipleConfs(mol, numConfs=3) + return mol +class TestRDKitReader(object): @pytest.mark.parametrize("rdmol, n_frames", [ (mol2_mol(), 1), (smiles_mol(), 3), @@ -71,8 +69,8 @@ def test_no_coordinates(self): expected[:] = np.nan assert_equal(u.trajectory.coordinate_array, expected) - def test_compare_mol2reader(self, mol2_mol): - universe = mda.Universe(mol2_mol) + def test_compare_mol2reader(self): + universe = mda.Universe(mol2_mol()) mol2 = mda.Universe(mol2_molecule) assert universe.trajectory.n_frames == mol2.trajectory.n_frames assert_equal(universe.trajectory.ts.positions, From 27558538a29b0ef01578a13c91c6607ec8f9e485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Thu, 18 Jun 2020 20:15:52 +0200 Subject: [PATCH 36/39] fix the docs and style guide --- package/MDAnalysis/core/universe.py | 7 +++++-- package/MDAnalysis/topology/RDKitParser.py | 2 +- testsuite/MDAnalysisTests/core/test_universe.py | 1 - testsuite/MDAnalysisTests/topology/test_rdkit.py | 10 +++++----- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index ffffb1e5f02..3790232933a 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1323,19 +1323,22 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, Examples -------- - To create a Universe with 10 conformers of ethanol :: + To create a Universe with 10 conformers of ethanol: + >>> u = mda.Universe.from_smiles('CCO', numConfs=10) >>> u >>> u.trajectory - To use a different conformer generation algorithm, like ETKDGv3 :: + To use a different conformer generation algorithm, like ETKDGv3: + >>> u = mda.Universe.from_smiles('CCO', rdkit_kwargs=dict( params=AllChem.ETKDGv3())) >>> u.trajectory + .. versionadded:: 1.0.0 """ try: diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index c46e5a856cc..dde92aede5d 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -75,7 +75,7 @@ class RDKitParser(TopologyReaderBase): """ For RDKit structures - Create the following Attributes: + Creates the following Attributes: - Atomids - Atomnames - Aromaticities diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index fb371f70532..630adfa7136 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -288,7 +288,6 @@ def test_coordinates(self): [ 1.0077721 , -0.10363862, -0.41727486]]], dtype=np.float32) assert_almost_equal(u.trajectory.coordinate_array, expected) - class TestUniverse(object): # older tests, still useful diff --git a/testsuite/MDAnalysisTests/topology/test_rdkit.py b/testsuite/MDAnalysisTests/topology/test_rdkit.py index 695cef5100c..1a4218e55e2 100644 --- a/testsuite/MDAnalysisTests/topology/test_rdkit.py +++ b/testsuite/MDAnalysisTests/topology/test_rdkit.py @@ -24,7 +24,7 @@ import warnings import pytest import numpy as np -from numpy.testing import assert_array_equal +from numpy.testing import assert_equal import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -104,7 +104,7 @@ def test_multiple_charge_priority(self, expected = np.array([ a.GetDoubleProp('_GasteigerCharge') for a in filename_gasteiger.GetAtoms()], dtype=np.float32) - assert_array_equal(expected, top_gas_tripos.charges.values) + assert_equal(expected, top_gas_tripos.charges.values) def test_multiple_charge_props_warning(self): with warnings.catch_warnings(record=True) as w: @@ -122,18 +122,18 @@ def test_gasteiger_charges(self, top_gasteiger, filename_gasteiger): expected = np.array([ a.GetDoubleProp('_GasteigerCharge') for a in filename_gasteiger.GetAtoms()], dtype=np.float32) - assert_array_equal(expected, top_gasteiger.charges.values) + assert_equal(expected, top_gasteiger.charges.values) def test_tripos_charges(self, top, filename): expected = np.array([ a.GetDoubleProp('_TriposPartialCharge') for a in filename.GetAtoms() ], dtype=np.float32) - assert_array_equal(expected, top.charges.values) + assert_equal(expected, top.charges.values) def test_aromaticity(self, top, filename): expected = np.array([ atom.GetIsAromatic() for atom in filename.GetAtoms()]) - assert_array_equal(expected, top.aromaticities.values) + assert_equal(expected, top.aromaticities.values) class TestRDKitParserPDB(RDKitParserBase): From b611915eb8e07d6c04840967ab0384982357e409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 19 Jun 2020 11:58:59 +0200 Subject: [PATCH 37/39] update docs and changelog for 2.0.0 --- package/CHANGELOG | 8 +++++++- package/MDAnalysis/coordinates/RDKit.py | 7 ++----- package/MDAnalysis/core/universe.py | 2 +- package/MDAnalysis/topology/RDKitParser.py | 9 ++++----- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 82321851083..7149d7d8958 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -??/??/?? richardjgowers, IAlibay, hmacdope +??/??/?? richardjgowers, IAlibay, hmacdope, cbouy * 2.0.0 @@ -22,6 +22,12 @@ Fixes (Issues #2449, #2651) Enhancements + * Added the RDKitParser which creates a `core.topology.Topology` object from + an `rdkit.Chem.rdchem.Mol` object, and the RDKitReader to read coordinates + from RDKit conformers (Issue #2468, PR #2707) + * Added the `Aromaticities` topology attribute, and the `aromatic` selection + token (Issue #2468, PR #2707) + * Added the `from_smiles` classmethod to the Universe (Issue #2468, PR #2707) * Added computation of Mean Squared Displacements (#2438, PR #2619) Changes diff --git a/package/MDAnalysis/coordinates/RDKit.py b/package/MDAnalysis/coordinates/RDKit.py index 0d56d612ea7..3c64241d109 100644 --- a/package/MDAnalysis/coordinates/RDKit.py +++ b/package/MDAnalysis/coordinates/RDKit.py @@ -64,7 +64,7 @@ class RDKitReader(memory.MemoryReader): """Coordinate reader for RDKit. - .. versionadded:: 1.0.0 + .. versionadded:: 2.0.0 """ format = 'RDKIT' @@ -73,10 +73,7 @@ class RDKitReader(memory.MemoryReader): @staticmethod def _format_hint(thing): - """Can this reader read *thing*? - - .. versionadded:: 1.0.0 - """ + """Can this reader read *thing*?""" try: from rdkit import Chem except ImportError: diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 3790232933a..b2562effb97 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1339,7 +1339,7 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, - .. versionadded:: 1.0.0 + .. versionadded:: 2.0.0 """ try: from rdkit import Chem diff --git a/package/MDAnalysis/topology/RDKitParser.py b/package/MDAnalysis/topology/RDKitParser.py index dde92aede5d..e483ba667ea 100644 --- a/package/MDAnalysis/topology/RDKitParser.py +++ b/package/MDAnalysis/topology/RDKitParser.py @@ -96,16 +96,15 @@ class RDKitParser(TopologyReaderBase): - ChainIDs - Occupancies - Tempfactors - + + + .. versionadded:: 2.0.0 """ format = 'RDKIT' @staticmethod def _format_hint(thing): - """Can this Parser read object *thing*? - - .. versionadded:: 1.0.0 - """ + """Can this Parser read object *thing*?""" try: from rdkit import Chem except ImportError: # if no rdkit, probably not rdkit From 76fb209a88cd6e00be05a3c1b9e2658a2243f1ce Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 19 Jun 2020 07:27:31 -0700 Subject: [PATCH 38/39] Update selection.py --- package/MDAnalysis/core/selection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 53d4663d7ef..2ade842c6eb 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -580,7 +580,6 @@ def __init__(self, parser, tokens): pass def apply(self, group): - mask = np.zeros(len(group), dtype=np.bool) mask = getattr(group, self.field, mask) return group[mask].unique From 13168ba0c21b2647d9864d9339d8f566922f4c8d Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 19 Jun 2020 15:43:41 +0100 Subject: [PATCH 39/39] Update selection.py --- package/MDAnalysis/core/selection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 2ade842c6eb..ced2b91763a 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -580,8 +580,7 @@ def __init__(self, parser, tokens): pass def apply(self, group): - mask = getattr(group, self.field, mask) - return group[mask].unique + return group[group.aromaticities].unique class ResidSelection(Selection):