From 8ef63acbc1c6bade0b00ef0836e266ce9146ca64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 31 Jul 2020 17:38:24 +0200
Subject: [PATCH 1/9] add SMARTS selection

---
 package/MDAnalysis/core/groups.py             |  9 +++++++
 package/MDAnalysis/core/selection.py          | 26 +++++++++++++++++++
 .../core/test_atomselections.py               | 23 ++++++++++++++--
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py
index ddcf9b792ee..8777928de2b 100644
--- a/package/MDAnalysis/core/groups.py
+++ b/package/MDAnalysis/core/groups.py
@@ -2574,6 +2574,8 @@ def select_atoms(self, sel, *othersel, **selgroups):
           force the selection to be re evaluated each time the Timestep of the
           trajectory is changed.  See section on **Dynamic selections** below.
           [``True``]
+        smarts : bool (optional)
+            specify if the selection is a SMARTS query
         **selgroups : keyword arguments of str: AtomGroup (optional)
           when using the "group" keyword in selections, groups are defined by
           passing them as keyword arguments.  See section on **preexisting
@@ -2835,6 +2837,8 @@ def select_atoms(self, sel, *othersel, **selgroups):
            equivalent ``global group`` selection.
            Removed flags affecting default behaviour for periodic selections;
            periodic are now on by default (as with default flags)
+        .. versionchanged:: 2.0.0
+            Added smarts kwarg (default False)
         """
 
         if not sel:
@@ -2845,6 +2849,8 @@ def select_atoms(self, sel, *othersel, **selgroups):
         periodic = selgroups.pop('periodic', True)
 
         updating = selgroups.pop('updating', False)
+
+        smarts = selgroups.pop('smarts', False)
         sel_strs = (sel,) + othersel
 
         for group, thing in selgroups.items():
@@ -2853,6 +2859,9 @@ def select_atoms(self, sel, *othersel, **selgroups):
                                 "You provided {} for group '{}'".format(
                                     thing.__class__.__name__, group))
 
+        if smarts:
+            return selection.SmartsSelection(sel_strs).apply(self)
+
         selections = tuple((selection.Parser.parse(s, selgroups, periodic=periodic)
                             for s in sel_strs))
         if updating:
diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index bdb156ff249..f55abc239dd 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -600,6 +600,32 @@ def apply(self, group):
         return group[group.aromaticities].unique
 
 
+class SmartsSelection(Selection):
+    """Select atoms based on SMARTS queries"""
+    def __init__(self, sel_strs):
+        self.sel_strs = sel_strs
+
+    def apply(self, group):
+        try:
+            from rdkit import Chem
+        except ImportError:
+            raise ImportError("RDKit is required for SMARTS-based atom "
+                              "selection but it's not installed. Try "
+                              "installing it with \n"
+                              "conda install -c conda-forge rdkit")
+        mol = group.convert_to("RDKIT")
+        indices = []
+        for pattern in self.sel_strs:
+            pattern = Chem.MolFromSmarts(pattern)
+            matches = mol.GetSubstructMatches(pattern)
+            if matches:
+                indices.extend([idx for match in matches for idx in match])
+        indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index")
+                   for i in indices]
+        mask = np.in1d(range(group.n_atoms), np.unique(indices))
+        return group[mask]
+
+
 class ResidSelection(Selection):
     """Select atoms based on numerical fields
 
diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index b7d0f515b7f..700b6116615 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -42,6 +42,7 @@
     TRZ_psf, TRZ,
     PDB_icodes,
     PDB_HOLE,
+    PDB_helix,
 )
 from MDAnalysisTests import make_Universe
 
@@ -526,17 +527,35 @@ def u(self):
         u = MDAnalysis.Universe.from_smiles(smi, addHs=False,
                                             generate_coordinates=False)
         return u
-        
+
+    @pytest.fixture
+    def u2(self):
+        u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", 
+                                            generate_coordinates=False)
+        return u
+
     @pytest.mark.parametrize("sel_str, n_atoms", [
         ("aromatic", 5),
         ("not aromatic", 1),
         ("type N and aromatic", 1),
         ("type C and aromatic", 4),
     ])
-    def test_selection(self, u, sel_str, n_atoms):
+    def test_aromatic_selection(self, u, sel_str, n_atoms):
         sel = u.select_atoms(sel_str)
         assert sel.n_atoms == n_atoms
 
+    @pytest.mark.parametrize("sel_str, n_atoms", [
+        ("n", 1),
+        ("[#7]", 2),
+        ("a", 5),
+        ("c", 4),
+        ("[*-]", 1),
+        ("[$([!#1]);$([!R][R])]", 2),
+    ])
+    def test_smarts_selection(self, u2, sel_str, n_atoms):
+        sel = u2.select_atoms(sel_str, smarts=True)
+        assert sel.n_atoms == n_atoms
+
 
 class TestSelectionsNucleicAcids(object):
     @pytest.fixture(scope='class')

From ecd95e59870dba53146fe8dadcda875f50da78ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 31 Jul 2020 17:48:31 +0200
Subject: [PATCH 2/9] pep8

---
 testsuite/MDAnalysisTests/core/test_atomselections.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index 700b6116615..0257557b59b 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -530,7 +530,7 @@ def u(self):
 
     @pytest.fixture
     def u2(self):
-        u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1", 
+        u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1",
                                             generate_coordinates=False)
         return u
 

From 517f9a5b422c96b6be1b8f847fe2a2ba71b3479b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 31 Jul 2020 19:54:46 +0200
Subject: [PATCH 3/9] add 'smarts' token to selections

---
 package/MDAnalysis/core/groups.py             | 54 +++----------------
 package/MDAnalysis/core/selection.py          | 22 +++++---
 .../core/test_atomselections.py               | 16 +++---
 3 files changed, 30 insertions(+), 62 deletions(-)

diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py
index 8777928de2b..ae14ed1674a 100644
--- a/package/MDAnalysis/core/groups.py
+++ b/package/MDAnalysis/core/groups.py
@@ -103,6 +103,7 @@
 from ..lib import distances
 from ..lib import transformations
 from ..lib import mdamath
+from ..converters.accessors import Accessor, ConverterWrapper
 from ..selections import get_writer as get_selection_writer_for
 from . import selection
 from ..exceptions import NoDataError
@@ -2574,8 +2575,6 @@ def select_atoms(self, sel, *othersel, **selgroups):
           force the selection to be re evaluated each time the Timestep of the
           trajectory is changed.  See section on **Dynamic selections** below.
           [``True``]
-        smarts : bool (optional)
-            specify if the selection is a SMARTS query
         **selgroups : keyword arguments of str: AtomGroup (optional)
           when using the "group" keyword in selections, groups are defined by
           passing them as keyword arguments.  See section on **preexisting
@@ -2691,6 +2690,10 @@ def select_atoms(self, sel, *othersel, **selgroups):
             record_type *record_type*
                 for selecting either ATOM or HETATM from PDB-like files.
                 e.g. ``select_atoms('name CA and not record_type HETATM')``
+            smarts *SMARTS-query*
+                select atoms using Daylight's SMARTS queries, e.g. ``smarts
+                [#7;R]`` to find nitrogen atoms in rings. Restricted to 1000
+                matches.
 
         **Boolean**
 
@@ -2838,7 +2841,7 @@ def select_atoms(self, sel, *othersel, **selgroups):
            Removed flags affecting default behaviour for periodic selections;
            periodic are now on by default (as with default flags)
         .. versionchanged:: 2.0.0
-            Added smarts kwarg (default False)
+            Added the *smarts* selection.
         """
 
         if not sel:
@@ -2850,7 +2853,6 @@ def select_atoms(self, sel, *othersel, **selgroups):
 
         updating = selgroups.pop('updating', False)
 
-        smarts = selgroups.pop('smarts', False)
         sel_strs = (sel,) + othersel
 
         for group, thing in selgroups.items():
@@ -2859,9 +2861,6 @@ def select_atoms(self, sel, *othersel, **selgroups):
                                 "You provided {} for group '{}'".format(
                                     thing.__class__.__name__, group))
 
-        if smarts:
-            return selection.SmartsSelection(sel_strs).apply(self)
-
         selections = tuple((selection.Parser.parse(s, selgroups, periodic=periodic)
                             for s in sel_strs))
         if updating:
@@ -3060,46 +3059,7 @@ def cmap(self):
                 "cmap only makes sense for a group with exactly 5 atoms")
         return topologyobjects.CMap(self.ix, self.universe)
 
-    def convert_to(self, package):
-        """
-        Convert :class:`AtomGroup` to a structure from another Python package.
-
-        Example
-        -------
-
-        The code below converts a Universe to a :class:`parmed.structure.Structure`.
-
-        .. code-block:: python
-
-            >>> import MDAnalysis as mda
-            >>> from MDAnalysis.tests.datafiles import GRO
-            >>> u = mda.Universe(GRO)
-            >>> parmed_structure = u.atoms.convert_to('PARMED')
-            >>> parmed_structure
-            <Structure 47681 atoms; 11302 residues; 0 bonds; PBC (triclinic); NOT parametrized>
-
-
-        Parameters
-        ----------
-        package: str
-            The name of the package to convert to, e.g. ``"PARMED"``
-
-
-        Returns
-        -------
-        output:
-            An instance of the structure type from another package.
-
-        Raises
-        ------
-        TypeError:
-            No converter was found for the required package
-
-
-        .. versionadded:: 1.0.0
-        """
-        converter = get_converter_for(package)
-        return converter().convert(self.atoms)
+    convert_to = Accessor(ConverterWrapper)
 
     def write(self, filename=None, file_format=None,
               filenamefmt="{trjname}_{frame}", frames=None, **kwargs):
diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index f55abc239dd..44a70fa84ca 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -602,8 +602,14 @@ def apply(self, group):
 
 class SmartsSelection(Selection):
     """Select atoms based on SMARTS queries"""
-    def __init__(self, sel_strs):
-        self.sel_strs = sel_strs
+    token = 'smarts'
+
+    def __init__(self, parser, tokens):
+        pattern = []
+        while not is_keyword(tokens[0]) or tokens[0] in ["(", ")"]:
+            val = tokens.popleft()
+            pattern.append(val)
+        self.pattern = "".join(pattern)
 
     def apply(self, group):
         try:
@@ -613,13 +619,13 @@ def apply(self, group):
                               "selection but it's not installed. Try "
                               "installing it with \n"
                               "conda install -c conda-forge rdkit")
+        pattern = Chem.MolFromSmarts(self.pattern)
+        if not pattern:
+            raise ValueError("{!r} is not a valid SMARTS query".format(
+                self.pattern))
         mol = group.convert_to("RDKIT")
-        indices = []
-        for pattern in self.sel_strs:
-            pattern = Chem.MolFromSmarts(pattern)
-            matches = mol.GetSubstructMatches(pattern)
-            if matches:
-                indices.extend([idx for match in matches for idx in match])
+        matches = mol.GetSubstructMatches(pattern, useChirality=True)
+        indices = [idx for match in matches for idx in match]
         indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index")
                    for i in indices]
         mask = np.in1d(range(group.n_atoms), np.unique(indices))
diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index 0257557b59b..d7a7899e8e7 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -545,15 +545,17 @@ def test_aromatic_selection(self, u, sel_str, n_atoms):
         assert sel.n_atoms == n_atoms
 
     @pytest.mark.parametrize("sel_str, n_atoms", [
-        ("n", 1),
-        ("[#7]", 2),
-        ("a", 5),
-        ("c", 4),
-        ("[*-]", 1),
-        ("[$([!#1]);$([!R][R])]", 2),
+        ("smarts n", 1),
+        ("smarts [#7]", 2),
+        ("smarts a", 5),
+        ("smarts c", 4),
+        ("smarts [*-]", 1),
+        ("smarts [$([!#1]);$([!R][R])]", 2),
+        ("smarts a and type C", 4),
+        ("smarts a and type N", 1),
     ])
     def test_smarts_selection(self, u2, sel_str, n_atoms):
-        sel = u2.select_atoms(sel_str, smarts=True)
+        sel = u2.select_atoms(sel_str)
         assert sel.n_atoms == n_atoms
 
 

From 51ed0590363d71f0db7cfeadd5b713f88ac639bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Fri, 31 Jul 2020 20:03:03 +0200
Subject: [PATCH 4/9] revert convert_to (oops)

---
 package/MDAnalysis/core/groups.py | 42 +++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py
index ae14ed1674a..495cf780c23 100644
--- a/package/MDAnalysis/core/groups.py
+++ b/package/MDAnalysis/core/groups.py
@@ -103,7 +103,6 @@
 from ..lib import distances
 from ..lib import transformations
 from ..lib import mdamath
-from ..converters.accessors import Accessor, ConverterWrapper
 from ..selections import get_writer as get_selection_writer_for
 from . import selection
 from ..exceptions import NoDataError
@@ -3059,7 +3058,46 @@ def cmap(self):
                 "cmap only makes sense for a group with exactly 5 atoms")
         return topologyobjects.CMap(self.ix, self.universe)
 
-    convert_to = Accessor(ConverterWrapper)
+    def convert_to(self, package):
+        """
+        Convert :class:`AtomGroup` to a structure from another Python package.
+
+        Example
+        -------
+
+        The code below converts a Universe to a :class:`parmed.structure.Structure`.
+
+        .. code-block:: python
+
+            >>> import MDAnalysis as mda
+            >>> from MDAnalysis.tests.datafiles import GRO
+            >>> u = mda.Universe(GRO)
+            >>> parmed_structure = u.atoms.convert_to('PARMED')
+            >>> parmed_structure
+            <Structure 47681 atoms; 11302 residues; 0 bonds; PBC (triclinic); NOT parametrized>
+
+
+        Parameters
+        ----------
+        package: str
+            The name of the package to convert to, e.g. ``"PARMED"``
+
+
+        Returns
+        -------
+        output:
+            An instance of the structure type from another package.
+
+        Raises
+        ------
+        TypeError:
+            No converter was found for the required package
+
+
+        .. versionadded:: 1.0.0
+        """
+        converter = get_converter_for(package)
+        return converter().convert(self.atoms)
 
     def write(self, filename=None, file_format=None,
               filenamefmt="{trjname}_{frame}", frames=None, **kwargs):

From b9325124a7ee49cc9dd0b81e526cd81d04335386 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Tue, 4 Aug 2020 22:15:11 +0200
Subject: [PATCH 5/9] compatibility with grouping parentheses + documentation

---
 package/MDAnalysis/core/selection.py | 35 ++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index 44a70fa84ca..555e503fe47 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -601,12 +601,35 @@ def apply(self, group):
 
 
 class SmartsSelection(Selection):
-    """Select atoms based on SMARTS queries"""
+    """Select atoms based on SMARTS queries.
+    
+    Uses RDKit to run the query and converts the result to MDAnalysis.
+    Supports chirality.
+    """
     token = 'smarts'
 
     def __init__(self, parser, tokens):
+        # The parser will add spaces around parentheses and then split the
+        # selection based on spaces to create the tokens
+        # If the input SMARTS query contained parentheses, the query will be
+        # split because of that and we need to reconstruct it
+        # We also need to keep the parentheses that are not part of the smarts
+        # query intact
         pattern = []
-        while not is_keyword(tokens[0]) or tokens[0] in ["(", ")"]:
+        counter = {"(": 0, ")": 0}
+        # loop until keyword but ignore parentheses as a keyword
+        while tokens[0] in ["(", ")"] or not is_keyword(tokens[0]):
+            # keep track of the number of open and closed parentheses
+            if tokens[0] in ["(", ")"]:
+                counter[tokens[0]] += 1
+            # if the next char is a closing ")" but there's no corresponding
+            # open "(" then we've reached then end of the smarts query and
+            # the ")" is part of a grouping parenthesis
+            if tokens[1] == ")" and counter["("] != (counter[")"] + 1):
+                val = tokens.popleft()
+                pattern.append(val)
+                break
+            # add the token to the pattern and remove it from the tokens
             val = tokens.popleft()
             pattern.append(val)
         self.pattern = "".join(pattern)
@@ -625,9 +648,11 @@ def apply(self, group):
                 self.pattern))
         mol = group.convert_to("RDKIT")
         matches = mol.GetSubstructMatches(pattern, useChirality=True)
-        indices = [idx for match in matches for idx in match]
-        indices = [mol.GetAtomWithIdx(i).GetIntProp("_MDAnalysis_index")
-                   for i in indices]
+        # convert rdkit indices to mdanalysis'
+        indices = [
+            mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index")
+            for match in matches for idx in match]
+        # create boolean mask for atoms based on index
         mask = np.in1d(range(group.n_atoms), np.unique(indices))
         return group[mask]
 

From b1596b38f68a87d1d3bdff8fc29f43495472b258 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Tue, 4 Aug 2020 22:15:34 +0200
Subject: [PATCH 6/9] more tests

---
 testsuite/MDAnalysisTests/core/test_atomselections.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index d7a7899e8e7..47407ee5846 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -530,8 +530,7 @@ def u(self):
 
     @pytest.fixture
     def u2(self):
-        u = MDAnalysis.Universe.from_smiles("[O-]C(C=O)Cc1cNc(N)c1",
-                                            generate_coordinates=False)
+        u = MDAnalysis.Universe.from_smiles("Nc1cc(C[C@H]([O-])C=O)c[nH]1")
         return u
 
     @pytest.mark.parametrize("sel_str, n_atoms", [
@@ -551,12 +550,19 @@ def test_aromatic_selection(self, u, sel_str, n_atoms):
         ("smarts c", 4),
         ("smarts [*-]", 1),
         ("smarts [$([!#1]);$([!R][R])]", 2),
+        ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", 1),
+        ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", 0),
         ("smarts a and type C", 4),
+        ("(smarts a) and (type C)", 4),
         ("smarts a and type N", 1),
     ])
     def test_smarts_selection(self, u2, sel_str, n_atoms):
         sel = u2.select_atoms(sel_str)
         assert sel.n_atoms == n_atoms
+    
+    def test_invalid_smarts_sel_raises_error(self, u2):
+        with pytest.raises(ValueError, match="not a valid SMARTS"):
+            u2.select_atoms("smarts foo")
 
 
 class TestSelectionsNucleicAcids(object):

From 4c1bbfbcc3fc05da5ae1e0cdc4f3e3e3d1a61f59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Mon, 10 Aug 2020 16:28:32 +0200
Subject: [PATCH 7/9] fix smarts query from tokens

---
 package/MDAnalysis/core/selection.py                  | 4 ++--
 testsuite/MDAnalysisTests/core/test_atomselections.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index 555e503fe47..2f315adf01c 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -602,7 +602,7 @@ def apply(self, group):
 
 class SmartsSelection(Selection):
     """Select atoms based on SMARTS queries.
-    
+
     Uses RDKit to run the query and converts the result to MDAnalysis.
     Supports chirality.
     """
@@ -625,7 +625,7 @@ def __init__(self, parser, tokens):
             # if the next char is a closing ")" but there's no corresponding
             # open "(" then we've reached then end of the smarts query and
             # the ")" is part of a grouping parenthesis
-            if tokens[1] == ")" and counter["("] != (counter[")"] + 1):
+            if tokens[1] == ")" and counter["("] < (counter[")"] + 1):
                 val = tokens.popleft()
                 pattern.append(val)
                 break
diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index 47407ee5846..f1080f9a1d3 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -559,7 +559,7 @@ def test_aromatic_selection(self, u, sel_str, n_atoms):
     def test_smarts_selection(self, u2, sel_str, n_atoms):
         sel = u2.select_atoms(sel_str)
         assert sel.n_atoms == n_atoms
-    
+
     def test_invalid_smarts_sel_raises_error(self, u2):
         with pytest.raises(ValueError, match="not a valid SMARTS"):
             u2.select_atoms("smarts foo")

From 55691558811a1091bd8ff7a1868bbde140f63d72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Tue, 18 Aug 2020 20:10:16 +0200
Subject: [PATCH 8/9] add doc in .rst

---
 package/doc/sphinx/source/documentation_pages/selections.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst
index 80dc3aa8b60..e54a27ff592 100644
--- a/package/doc/sphinx/source/documentation_pages/selections.rst
+++ b/package/doc/sphinx/source/documentation_pages/selections.rst
@@ -99,6 +99,11 @@ moltype *molecule-type*
     select by molecule type, e.g. ``moltype Protein_A``. At the moment, only
     the TPR format defines the molecule type.
 
+smarts *SMARTS-query*
+    select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to
+    find nitrogen atoms in rings. Restricted to 1000 matches.
+
+
 Pattern matching
 ----------------
 

From 1d2bb91c088d43effbcc10f95abb5d8e246ea1f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= <cedric.bouysset@unice.fr>
Date: Wed, 26 Aug 2020 18:51:53 +0200
Subject: [PATCH 9/9] code review

---
 package/MDAnalysis/core/groups.py             |  5 ++--
 package/MDAnalysis/core/selection.py          | 15 ++++------
 .../source/documentation_pages/selections.rst |  3 +-
 .../core/test_atomselections.py               | 28 +++++++++----------
 4 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py
index 495cf780c23..66414fa8f17 100644
--- a/package/MDAnalysis/core/groups.py
+++ b/package/MDAnalysis/core/groups.py
@@ -2691,8 +2691,8 @@ def select_atoms(self, sel, *othersel, **selgroups):
                 e.g. ``select_atoms('name CA and not record_type HETATM')``
             smarts *SMARTS-query*
                 select atoms using Daylight's SMARTS queries, e.g. ``smarts
-                [#7;R]`` to find nitrogen atoms in rings. Restricted to 1000
-                matches.
+                [#7;R]`` to find nitrogen atoms in rings. Requires RDKit.
+                All matches (max 1000) are combined as a unique match
 
         **Boolean**
 
@@ -2851,7 +2851,6 @@ def select_atoms(self, sel, *othersel, **selgroups):
         periodic = selgroups.pop('periodic', True)
 
         updating = selgroups.pop('updating', False)
-
         sel_strs = (sel,) + othersel
 
         for group, thing in selgroups.items():
diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py
index 2f315adf01c..c9de1a25862 100644
--- a/package/MDAnalysis/core/selection.py
+++ b/package/MDAnalysis/core/selection.py
@@ -622,13 +622,11 @@ def __init__(self, parser, tokens):
             # keep track of the number of open and closed parentheses
             if tokens[0] in ["(", ")"]:
                 counter[tokens[0]] += 1
-            # if the next char is a closing ")" but there's no corresponding
-            # open "(" then we've reached then end of the smarts query and
-            # the ")" is part of a grouping parenthesis
-            if tokens[1] == ")" and counter["("] < (counter[")"] + 1):
-                val = tokens.popleft()
-                pattern.append(val)
-                break
+                # if the char is a closing ")" but there's no corresponding
+                # open "(" then we've reached then end of the smarts query and
+                # the current token ")" is part of a grouping parenthesis
+                if tokens[0] == ")" and counter["("] < (counter[")"]):
+                    break
             # add the token to the pattern and remove it from the tokens
             val = tokens.popleft()
             pattern.append(val)
@@ -644,8 +642,7 @@ def apply(self, group):
                               "conda install -c conda-forge rdkit")
         pattern = Chem.MolFromSmarts(self.pattern)
         if not pattern:
-            raise ValueError("{!r} is not a valid SMARTS query".format(
-                self.pattern))
+            raise ValueError(f"{self.pattern!r} is not a valid SMARTS query")
         mol = group.convert_to("RDKIT")
         matches = mol.GetSubstructMatches(pattern, useChirality=True)
         # convert rdkit indices to mdanalysis'
diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst
index e54a27ff592..88a06924998 100644
--- a/package/doc/sphinx/source/documentation_pages/selections.rst
+++ b/package/doc/sphinx/source/documentation_pages/selections.rst
@@ -101,7 +101,8 @@ moltype *molecule-type*
 
 smarts *SMARTS-query*
     select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to
-    find nitrogen atoms in rings. Restricted to 1000 matches.
+    find nitrogen atoms in rings. Requires RDKit. All matches (max 1000) are
+    combined as a unique match.
 
 
 Pattern matching
diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py
index f1080f9a1d3..cee4d63f315 100644
--- a/testsuite/MDAnalysisTests/core/test_atomselections.py
+++ b/testsuite/MDAnalysisTests/core/test_atomselections.py
@@ -543,22 +543,22 @@ def test_aromatic_selection(self, u, sel_str, n_atoms):
         sel = u.select_atoms(sel_str)
         assert sel.n_atoms == n_atoms
 
-    @pytest.mark.parametrize("sel_str, n_atoms", [
-        ("smarts n", 1),
-        ("smarts [#7]", 2),
-        ("smarts a", 5),
-        ("smarts c", 4),
-        ("smarts [*-]", 1),
-        ("smarts [$([!#1]);$([!R][R])]", 2),
-        ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", 1),
-        ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", 0),
-        ("smarts a and type C", 4),
-        ("(smarts a) and (type C)", 4),
-        ("smarts a and type N", 1),
+    @pytest.mark.parametrize("sel_str, indices", [
+        ("smarts n", [10]),
+        ("smarts [#7]", [0, 10]),
+        ("smarts a", [1, 2, 3, 9, 10]),
+        ("smarts c", [1, 2, 3, 9]),
+        ("smarts [*-]", [6]),
+        ("smarts [$([!#1]);$([!R][R])]", [0, 4]),
+        ("smarts [$([C@H](-[CH2])(-[O-])-C=O)]", [5]),
+        ("smarts [$([C@@H](-[CH2])(-[O-])-C=O)]", []),
+        ("smarts a and type C", [1, 2, 3, 9]),
+        ("(smarts a) and (type C)", [1, 2, 3, 9]),
+        ("smarts a and type N", [10]),
     ])
-    def test_smarts_selection(self, u2, sel_str, n_atoms):
+    def test_smarts_selection(self, u2, sel_str, indices):
         sel = u2.select_atoms(sel_str)
-        assert sel.n_atoms == n_atoms
+        assert_equal(sel.indices, indices)
 
     def test_invalid_smarts_sel_raises_error(self, u2):
         with pytest.raises(ValueError, match="not a valid SMARTS"):