From 8ef63acbc1c6bade0b00ef0836e266ce9146ca64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 31 Jul 2020 17:38:24 +0200 Subject: [PATCH 1/9] add SMARTS selection --- package/MDAnalysis/core/groups.py | 9 +++++++ package/MDAnalysis/core/selection.py | 26 +++++++++++++++++++ .../core/test_atomselections.py | 23 ++++++++++++++-- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index ddcf9b792ee..8777928de2b 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2574,6 +2574,8 @@ def select_atoms(self, sel, *othersel, **selgroups): force the selection to be re evaluated each time the Timestep of the trajectory is changed. See section on **Dynamic selections** below. [``True``] + smarts : bool (optional) + specify if the selection is a SMARTS query **selgroups : keyword arguments of str: AtomGroup (optional) when using the "group" keyword in selections, groups are defined by passing them as keyword arguments. See section on **preexisting @@ -2835,6 +2837,8 @@ def select_atoms(self, sel, *othersel, **selgroups): equivalent ``global group`` selection. Removed flags affecting default behaviour for periodic selections; periodic are now on by default (as with default flags) + .. versionchanged:: 2.0.0 + Added smarts kwarg (default False) """ if not sel: @@ -2845,6 +2849,8 @@ def select_atoms(self, sel, *othersel, **selgroups): periodic = selgroups.pop('periodic', True) updating = selgroups.pop('updating', False) + + smarts = selgroups.pop('smarts', False) sel_strs = (sel,) + othersel for group, thing in selgroups.items(): @@ -2853,6 +2859,9 @@ def select_atoms(self, sel, *othersel, **selgroups): "You provided {} for group '{}'".format( thing.__class__.__name__, group)) + if smarts: + return selection.SmartsSelection(sel_strs).apply(self) + selections = tuple((selection.Parser.parse(s, selgroups, periodic=periodic) for s in sel_strs)) if updating: diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index bdb156ff249..f55abc239dd 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -600,6 +600,32 @@ def apply(self, group): return group[group.aromaticities].unique +class SmartsSelection(Selection): + """Select atoms based on SMARTS queries""" + def __init__(self, sel_strs): + self.sel_strs = sel_strs + + def apply(self, group): + try: + from rdkit import Chem + except ImportError: + raise ImportError("RDKit is required for SMARTS-based atom " + "selection but it's not installed. Try " + "installing it with \n" + "conda install -c conda-forge rdkit") + mol = group.convert_to("RDKIT") + indices = [] + for pattern in self.sel_strs: + pattern = Chem.MolFromSmarts(pattern) + matches = mol.GetSubstructMatches(pattern) + if matches: + indices.extend([idx for match in matches for idx in match]) + indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index") + for i in indices] + mask = np.in1d(range(group.n_atoms), np.unique(indices)) + return group[mask] + + class ResidSelection(Selection): """Select atoms based on numerical fields diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index b7d0f515b7f..700b6116615 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -42,6 +42,7 @@ TRZ_psf, TRZ, PDB_icodes, PDB_HOLE, + PDB_helix, ) from MDAnalysisTests import make_Universe @@ -526,17 +527,35 @@ def u(self): u = MDAnalysis.Universe.from_smiles(smi, addHs=False, generate_coordinates=False) return u - + + @pytest.fixture + def u2(self): + u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", + generate_coordinates=False) + return u + @pytest.mark.parametrize("sel_str, n_atoms", [ ("aromatic", 5), ("not aromatic", 1), ("type N and aromatic", 1), ("type C and aromatic", 4), ]) - def test_selection(self, u, sel_str, n_atoms): + def test_aromatic_selection(self, u, sel_str, n_atoms): sel = u.select_atoms(sel_str) assert sel.n_atoms == n_atoms + @pytest.mark.parametrize("sel_str, n_atoms", [ + ("n", 1), + ("[#7]", 2), + ("a", 5), + ("c", 4), + ("[*-]", 1), + ("[$([!#1]);$([!R][R])]", 2), + ]) + def test_smarts_selection(self, u2, sel_str, n_atoms): + sel = u2.select_atoms(sel_str, smarts=True) + assert sel.n_atoms == n_atoms + class TestSelectionsNucleicAcids(object): @pytest.fixture(scope='class') From ecd95e59870dba53146fe8dadcda875f50da78ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 31 Jul 2020 17:48:31 +0200 Subject: [PATCH 2/9] pep8 --- testsuite/MDAnalysisTests/core/test_atomselections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 700b6116615..0257557b59b 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -530,7 +530,7 @@ def u(self): @pytest.fixture def u2(self): - u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", + u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", generate_coordinates=False) return u From 517f9a5b422c96b6be1b8f847fe2a2ba71b3479b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 31 Jul 2020 19:54:46 +0200 Subject: [PATCH 3/9] add 'smarts' token to selections --- package/MDAnalysis/core/groups.py | 54 +++---------------- package/MDAnalysis/core/selection.py | 22 +++++--- .../core/test_atomselections.py | 16 +++--- 3 files changed, 30 insertions(+), 62 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 8777928de2b..ae14ed1674a 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -103,6 +103,7 @@ from ..lib import distances from ..lib import transformations from ..lib import mdamath +from ..converters.accessors import Accessor, ConverterWrapper from ..selections import get_writer as get_selection_writer_for from . import selection from ..exceptions import NoDataError @@ -2574,8 +2575,6 @@ def select_atoms(self, sel, *othersel, **selgroups): force the selection to be re evaluated each time the Timestep of the trajectory is changed. See section on **Dynamic selections** below. [``True``] - smarts : bool (optional) - specify if the selection is a SMARTS query **selgroups : keyword arguments of str: AtomGroup (optional) when using the "group" keyword in selections, groups are defined by passing them as keyword arguments. See section on **preexisting @@ -2691,6 +2690,10 @@ def select_atoms(self, sel, *othersel, **selgroups): record_type *record_type* for selecting either ATOM or HETATM from PDB-like files. e.g. ``select_atoms('name CA and not record_type HETATM')`` + smarts *SMARTS-query* + select atoms using Daylight's SMARTS queries, e.g. ``smarts + [#7;R]`` to find nitrogen atoms in rings. Restricted to 1000 + matches. **Boolean** @@ -2838,7 +2841,7 @@ def select_atoms(self, sel, *othersel, **selgroups): Removed flags affecting default behaviour for periodic selections; periodic are now on by default (as with default flags) .. versionchanged:: 2.0.0 - Added smarts kwarg (default False) + Added the *smarts* selection. """ if not sel: @@ -2850,7 +2853,6 @@ def select_atoms(self, sel, *othersel, **selgroups): updating = selgroups.pop('updating', False) - smarts = selgroups.pop('smarts', False) sel_strs = (sel,) + othersel for group, thing in selgroups.items(): @@ -2859,9 +2861,6 @@ def select_atoms(self, sel, *othersel, **selgroups): "You provided {} for group '{}'".format( thing.__class__.__name__, group)) - if smarts: - return selection.SmartsSelection(sel_strs).apply(self) - selections = tuple((selection.Parser.parse(s, selgroups, periodic=periodic) for s in sel_strs)) if updating: @@ -3060,46 +3059,7 @@ def cmap(self): "cmap only makes sense for a group with exactly 5 atoms") return topologyobjects.CMap(self.ix, self.universe) - def convert_to(self, package): - """ - Convert :class:`AtomGroup` to a structure from another Python package. - - Example - ------- - - The code below converts a Universe to a :class:`parmed.structure.Structure`. - - .. code-block:: python - - >>> import MDAnalysis as mda - >>> from MDAnalysis.tests.datafiles import GRO - >>> u = mda.Universe(GRO) - >>> parmed_structure = u.atoms.convert_to('PARMED') - >>> parmed_structure - - - - Parameters - ---------- - package: str - The name of the package to convert to, e.g. ``"PARMED"`` - - - Returns - ------- - output: - An instance of the structure type from another package. - - Raises - ------ - TypeError: - No converter was found for the required package - - - .. versionadded:: 1.0.0 - """ - converter = get_converter_for(package) - return converter().convert(self.atoms) + convert_to = Accessor(ConverterWrapper) def write(self, filename=None, file_format=None, filenamefmt="{trjname}_{frame}", frames=None, **kwargs): diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index f55abc239dd..44a70fa84ca 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -602,8 +602,14 @@ def apply(self, group): class SmartsSelection(Selection): """Select atoms based on SMARTS queries""" - def __init__(self, sel_strs): - self.sel_strs = sel_strs + token = 'smarts' + + def __init__(self, parser, tokens): + pattern = [] + while not is_keyword(tokens[0]) or tokens[0] in ["(", ")"]: + val = tokens.popleft() + pattern.append(val) + self.pattern = "".join(pattern) def apply(self, group): try: @@ -613,13 +619,13 @@ def apply(self, group): "selection but it's not installed. Try " "installing it with \n" "conda install -c conda-forge rdkit") + pattern = Chem.MolFromSmarts(self.pattern) + if not pattern: + raise ValueError("{!r} is not a valid SMARTS query".format( + self.pattern)) mol = group.convert_to("RDKIT") - indices = [] - for pattern in self.sel_strs: - pattern = Chem.MolFromSmarts(pattern) - matches = mol.GetSubstructMatches(pattern) - if matches: - indices.extend([idx for match in matches for idx in match]) + matches = mol.GetSubstructMatches(pattern, useChirality=True) + indices = [idx for match in matches for idx in match] indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index") for i in indices] mask = np.in1d(range(group.n_atoms), np.unique(indices)) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 0257557b59b..d7a7899e8e7 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -545,15 +545,17 @@ def test_aromatic_selection(self, u, sel_str, n_atoms): assert sel.n_atoms == n_atoms @pytest.mark.parametrize("sel_str, n_atoms", [ - ("n", 1), - ("[#7]", 2), - ("a", 5), - ("c", 4), - ("[*-]", 1), - ("[$([!#1]);$([!R][R])]", 2), + ("smarts n", 1), + ("smarts [#7]", 2), + ("smarts a", 5), + ("smarts c", 4), + ("smarts [*-]", 1), + ("smarts [$([!#1]);$([!R][R])]", 2), + ("smarts a and type C", 4), + ("smarts a and type N", 1), ]) def test_smarts_selection(self, u2, sel_str, n_atoms): - sel = u2.select_atoms(sel_str, smarts=True) + sel = u2.select_atoms(sel_str) assert sel.n_atoms == n_atoms From 51ed0590363d71f0db7cfeadd5b713f88ac639bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Fri, 31 Jul 2020 20:03:03 +0200 Subject: [PATCH 4/9] revert convert_to (oops) --- package/MDAnalysis/core/groups.py | 42 +++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index ae14ed1674a..495cf780c23 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -103,7 +103,6 @@ from ..lib import distances from ..lib import transformations from ..lib import mdamath -from ..converters.accessors import Accessor, ConverterWrapper from ..selections import get_writer as get_selection_writer_for from . import selection from ..exceptions import NoDataError @@ -3059,7 +3058,46 @@ def cmap(self): "cmap only makes sense for a group with exactly 5 atoms") return topologyobjects.CMap(self.ix, self.universe) - convert_to = Accessor(ConverterWrapper) + def convert_to(self, package): + """ + Convert :class:`AtomGroup` to a structure from another Python package. + + Example + ------- + + The code below converts a Universe to a :class:`parmed.structure.Structure`. + + .. code-block:: python + + >>> import MDAnalysis as mda + >>> from MDAnalysis.tests.datafiles import GRO + >>> u = mda.Universe(GRO) + >>> parmed_structure = u.atoms.convert_to('PARMED') + >>> parmed_structure + + + + Parameters + ---------- + package: str + The name of the package to convert to, e.g. ``"PARMED"`` + + + Returns + ------- + output: + An instance of the structure type from another package. + + Raises + ------ + TypeError: + No converter was found for the required package + + + .. versionadded:: 1.0.0 + """ + converter = get_converter_for(package) + return converter().convert(self.atoms) def write(self, filename=None, file_format=None, filenamefmt="{trjname}_{frame}", frames=None, **kwargs): From b9325124a7ee49cc9dd0b81e526cd81d04335386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 4 Aug 2020 22:15:11 +0200 Subject: [PATCH 5/9] compatibility with grouping parentheses + documentation --- package/MDAnalysis/core/selection.py | 35 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 44a70fa84ca..555e503fe47 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -601,12 +601,35 @@ def apply(self, group): class SmartsSelection(Selection): - """Select atoms based on SMARTS queries""" + """Select atoms based on SMARTS queries. + + Uses RDKit to run the query and converts the result to MDAnalysis. + Supports chirality. + """ token = 'smarts' def __init__(self, parser, tokens): + # The parser will add spaces around parentheses and then split the + # selection based on spaces to create the tokens + # If the input SMARTS query contained parentheses, the query will be + # split because of that and we need to reconstruct it + # We also need to keep the parentheses that are not part of the smarts + # query intact pattern = [] - while not is_keyword(tokens[0]) or tokens[0] in ["(", ")"]: + counter = {"(": 0, ")": 0} + # loop until keyword but ignore parentheses as a keyword + while tokens[0] in ["(", ")"] or not is_keyword(tokens[0]): + # keep track of the number of open and closed parentheses + if tokens[0] in ["(", ")"]: + counter[tokens[0]] += 1 + # if the next char is a closing ")" but there's no corresponding + # open "(" then we've reached then end of the smarts query and + # the ")" is part of a grouping parenthesis + if tokens[1] == ")" and counter["("] != (counter[")"] + 1): + val = tokens.popleft() + pattern.append(val) + break + # add the token to the pattern and remove it from the tokens val = tokens.popleft() pattern.append(val) self.pattern = "".join(pattern) @@ -625,9 +648,11 @@ def apply(self, group): self.pattern)) mol = group.convert_to("RDKIT") matches = mol.GetSubstructMatches(pattern, useChirality=True) - indices = [idx for match in matches for idx in match] - indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index") - for i in indices] + # convert rdkit indices to mdanalysis' + indices = [ + mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") + for match in matches for idx in match] + # create boolean mask for atoms based on index mask = np.in1d(range(group.n_atoms), np.unique(indices)) return group[mask] From b1596b38f68a87d1d3bdff8fc29f43495472b258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 4 Aug 2020 22:15:34 +0200 Subject: [PATCH 6/9] more tests --- testsuite/MDAnalysisTests/core/test_atomselections.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index d7a7899e8e7..47407ee5846 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -530,8 +530,7 @@ def u(self): @pytest.fixture def u2(self): - u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", - generate_coordinates=False) + u = MDAnalysis.Universe.from_smiles("Nc1cc(C[C@H]([O-])C=O)c[nH]1") return u @pytest.mark.parametrize("sel_str, n_atoms", [ @@ -551,12 +550,19 @@ def test_aromatic_selection(self, u, sel_str, n_atoms): ("smarts c", 4), ("smarts [*-]", 1), ("smarts [$([!#1]);$([!R][R])]", 2), + ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", 1), + ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", 0), ("smarts a and type C", 4), + ("(smarts a) and (type C)", 4), ("smarts a and type N", 1), ]) def test_smarts_selection(self, u2, sel_str, n_atoms): sel = u2.select_atoms(sel_str) assert sel.n_atoms == n_atoms + + def test_invalid_smarts_sel_raises_error(self, u2): + with pytest.raises(ValueError, match="not a valid SMARTS"): + u2.select_atoms("smarts foo") class TestSelectionsNucleicAcids(object): From 4c1bbfbcc3fc05da5ae1e0cdc4f3e3e3d1a61f59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 10 Aug 2020 16:28:32 +0200 Subject: [PATCH 7/9] fix smarts query from tokens --- package/MDAnalysis/core/selection.py | 4 ++-- testsuite/MDAnalysisTests/core/test_atomselections.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 555e503fe47..2f315adf01c 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -602,7 +602,7 @@ def apply(self, group): class SmartsSelection(Selection): """Select atoms based on SMARTS queries. - + Uses RDKit to run the query and converts the result to MDAnalysis. Supports chirality. """ @@ -625,7 +625,7 @@ def __init__(self, parser, tokens): # if the next char is a closing ")" but there's no corresponding # open "(" then we've reached then end of the smarts query and # the ")" is part of a grouping parenthesis - if tokens[1] == ")" and counter["("] != (counter[")"] + 1): + if tokens[1] == ")" and counter["("] < (counter[")"] + 1): val = tokens.popleft() pattern.append(val) break diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 47407ee5846..f1080f9a1d3 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -559,7 +559,7 @@ def test_aromatic_selection(self, u, sel_str, n_atoms): def test_smarts_selection(self, u2, sel_str, n_atoms): sel = u2.select_atoms(sel_str) assert sel.n_atoms == n_atoms - + def test_invalid_smarts_sel_raises_error(self, u2): with pytest.raises(ValueError, match="not a valid SMARTS"): u2.select_atoms("smarts foo") From 55691558811a1091bd8ff7a1868bbde140f63d72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Tue, 18 Aug 2020 20:10:16 +0200 Subject: [PATCH 8/9] add doc in .rst --- package/doc/sphinx/source/documentation_pages/selections.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 80dc3aa8b60..e54a27ff592 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -99,6 +99,11 @@ moltype *molecule-type* select by molecule type, e.g. ``moltype Protein_A``. At the moment, only the TPR format defines the molecule type. +smarts *SMARTS-query* + select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to + find nitrogen atoms in rings. Restricted to 1000 matches. + + Pattern matching ---------------- From 1d2bb91c088d43effbcc10f95abb5d8e246ea1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Wed, 26 Aug 2020 18:51:53 +0200 Subject: [PATCH 9/9] code review --- package/MDAnalysis/core/groups.py | 5 ++-- package/MDAnalysis/core/selection.py | 15 ++++------ .../source/documentation_pages/selections.rst | 3 +- .../core/test_atomselections.py | 28 +++++++++---------- 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 495cf780c23..66414fa8f17 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2691,8 +2691,8 @@ def select_atoms(self, sel, *othersel, **selgroups): e.g. ``select_atoms('name CA and not record_type HETATM')`` smarts *SMARTS-query* select atoms using Daylight's SMARTS queries, e.g. ``smarts - [#7;R]`` to find nitrogen atoms in rings. Restricted to 1000 - matches. + [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. + All matches (max 1000) are combined as a unique match **Boolean** @@ -2851,7 +2851,6 @@ def select_atoms(self, sel, *othersel, **selgroups): periodic = selgroups.pop('periodic', True) updating = selgroups.pop('updating', False) - sel_strs = (sel,) + othersel for group, thing in selgroups.items(): diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 2f315adf01c..c9de1a25862 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -622,13 +622,11 @@ def __init__(self, parser, tokens): # keep track of the number of open and closed parentheses if tokens[0] in ["(", ")"]: counter[tokens[0]] += 1 - # if the next char is a closing ")" but there's no corresponding - # open "(" then we've reached then end of the smarts query and - # the ")" is part of a grouping parenthesis - if tokens[1] == ")" and counter["("] < (counter[")"] + 1): - val = tokens.popleft() - pattern.append(val) - break + # if the char is a closing ")" but there's no corresponding + # open "(" then we've reached then end of the smarts query and + # the current token ")" is part of a grouping parenthesis + if tokens[0] == ")" and counter["("] < (counter[")"]): + break # add the token to the pattern and remove it from the tokens val = tokens.popleft() pattern.append(val) @@ -644,8 +642,7 @@ def apply(self, group): "conda install -c conda-forge rdkit") pattern = Chem.MolFromSmarts(self.pattern) if not pattern: - raise ValueError("{!r} is not a valid SMARTS query".format( - self.pattern)) + raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") mol = group.convert_to("RDKIT") matches = mol.GetSubstructMatches(pattern, useChirality=True) # convert rdkit indices to mdanalysis' diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index e54a27ff592..88a06924998 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -101,7 +101,8 @@ moltype *molecule-type* smarts *SMARTS-query* select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to - find nitrogen atoms in rings. Restricted to 1000 matches. + find nitrogen atoms in rings. Requires RDKit. All matches (max 1000) are + combined as a unique match. Pattern matching diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index f1080f9a1d3..cee4d63f315 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -543,22 +543,22 @@ def test_aromatic_selection(self, u, sel_str, n_atoms): sel = u.select_atoms(sel_str) assert sel.n_atoms == n_atoms - @pytest.mark.parametrize("sel_str, n_atoms", [ - ("smarts n", 1), - ("smarts [#7]", 2), - ("smarts a", 5), - ("smarts c", 4), - ("smarts [*-]", 1), - ("smarts [$([!#1]);$([!R][R])]", 2), - ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", 1), - ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", 0), - ("smarts a and type C", 4), - ("(smarts a) and (type C)", 4), - ("smarts a and type N", 1), + @pytest.mark.parametrize("sel_str, indices", [ + ("smarts n", [10]), + ("smarts [#7]", [0, 10]), + ("smarts a", [1, 2, 3, 9, 10]), + ("smarts c", [1, 2, 3, 9]), + ("smarts [*-]", [6]), + ("smarts [$([!#1]);$([!R][R])]", [0, 4]), + ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", [5]), + ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", []), + ("smarts a and type C", [1, 2, 3, 9]), + ("(smarts a) and (type C)", [1, 2, 3, 9]), + ("smarts a and type N", [10]), ]) - def test_smarts_selection(self, u2, sel_str, n_atoms): + def test_smarts_selection(self, u2, sel_str, indices): sel = u2.select_atoms(sel_str) - assert sel.n_atoms == n_atoms + assert_equal(sel.indices, indices) def test_invalid_smarts_sel_raises_error(self, u2): with pytest.raises(ValueError, match="not a valid SMARTS"):