From 7a57af161b6a636cea8a778ca6a5a8927f483d45 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 2 Apr 2021 16:49:23 -0700 Subject: [PATCH 01/38] Fixed issue #2915 and added a pytest to demonstrate the fix. Sphzone operating on an empty selection now returns an empty atom group. --- package/MDAnalysis/core/selection.py | 4 +++- testsuite/MDAnalysisTests/core/test_atomselections.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index adff7ab3eeb..feeb2144396 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -375,11 +375,13 @@ def __init__(self, parser, tokens): self.sel = parser.parse_expression(self.precedence) @return_empty_on_apply - def apply(self, group): + def apply(self, group): # TODO fix atom selection indices = [] sel = self.sel.apply(group) box = self.validate_dimensions(group.dimensions) periodic = box is not None + if len(sel) == 0: + return group[[]] ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) pairs = distances.capped_distance(ref, group.positions, self.cutoff, box=box, diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 6d52ccf0a8f..8c04925e02d 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -730,6 +730,11 @@ def test_sphzone(self, u, periodic, expected): assert len(sel) == expected + @pytest.mark.parametrize('periodic,expected', ([True, 33], [False, 25])) + def test_sphzone(self, u, periodic, expected): + sel = u.select_atoms('sphzone 5.0 name NOT_A_NAME', periodic=periodic) + assert len(sel) == 0 + class TestTriclinicDistanceSelections(BaseDistanceSelection): @pytest.fixture() From 89a38d341d8e8d545f62ee963a34bfb8f1abd63d Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 2 Apr 2021 17:18:29 -0700 Subject: [PATCH 02/38] removed a unncesessary TODO statement --- package/MDAnalysis/core/selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index feeb2144396..31e536e6ede 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -375,7 +375,7 @@ def __init__(self, parser, tokens): self.sel = parser.parse_expression(self.precedence) @return_empty_on_apply - def apply(self, group): # TODO fix atom selection + def apply(self, group): indices = [] sel = self.sel.apply(group) box = self.validate_dimensions(group.dimensions) From a9194a3f6e0fca8365098724c37beef41078d0df Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 2 Apr 2021 17:22:04 -0700 Subject: [PATCH 03/38] added issue #2915 fix to CHANGELOG --- package/CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index bf10ef51bc4..163d32afc9e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -22,6 +22,7 @@ The rules for this file: * 2.0.0 Fixes + * Fixed 'sphzone' behavior to be consistent with 'around' (Issue #2915) * A Universe created from an ROMol with no atoms returns now a Universe with 0 atoms (Issue #3142) * ValueError raised when empty atomgroup is given to DensityAnalysis From dcb23b54eef029759d99f355b2a8b2f3843cdc35 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 2 Apr 2021 21:52:19 -0700 Subject: [PATCH 04/38] added self to AUTHORS and CHANGELOG, moved lines in selection --- package/AUTHORS | 1 + package/CHANGELOG | 2 +- package/MDAnalysis/core/selection.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 6a32bc9ce33..00076883aa4 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -157,6 +157,7 @@ Chronological list of authors - Leonardo Barneschi - Henrik Jäger - Jan Stevens + - Orion Cohen External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 163d32afc9e..79ba91d1eed 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -17,7 +17,7 @@ The rules for this file: lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri, hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu, - biogen98 + biogen98, orioncohen * 2.0.0 diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 31e536e6ede..d0d0fc93bf3 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -378,10 +378,10 @@ def __init__(self, parser, tokens): def apply(self, group): indices = [] sel = self.sel.apply(group) - box = self.validate_dimensions(group.dimensions) - periodic = box is not None if len(sel) == 0: return group[[]] + box = self.validate_dimensions(group.dimensions) + periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) pairs = distances.capped_distance(ref, group.positions, self.cutoff, box=box, From 889b28d395717330e7d38f7e1bf87a3e0361edd0 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sat, 3 Apr 2021 08:13:06 -0700 Subject: [PATCH 05/38] fixed issue #2915 for cylayer, cyzone, and sphlayer. added testing to confirm. --- package/MDAnalysis/core/selection.py | 5 ++++- testsuite/MDAnalysisTests/core/test_atomselections.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index d0d0fc93bf3..93dc56e8f3a 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -352,6 +352,8 @@ def __init__(self, parser, tokens): def apply(self, group): indices = [] sel = self.sel.apply(group) + if len(sel) == 0: + return group[[]] box = self.validate_dimensions(group.dimensions) periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) @@ -396,7 +398,8 @@ class CylindricalSelection(Selection): @return_empty_on_apply def apply(self, group): sel = self.sel.apply(group) - + if len(sel) == 0: + return group[[]] # Calculate vectors between point of interest and our group vecs = group.positions - sel.center_of_geometry() diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 8c04925e02d..b5120ac8328 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -209,6 +209,8 @@ def test_sphzone(self, universe, selstr): def test_cylayer(self, universe, selstr): sel = universe.select_atoms(selstr) assert_equal(len(sel), 88) + empty = universe.select_atoms('cylayer 4.0 6.0 10 -10 name NOT_A_NAME') + assert_equal(len(empty), 0) @pytest.mark.parametrize('selstr', [ 'cyzone 6.0 10 -10 bynum 1281', @@ -217,6 +219,8 @@ def test_cylayer(self, universe, selstr): def test_cyzone(self, universe, selstr): sel = universe.select_atoms(selstr) assert_equal(len(sel), 166) + empty = universe.select_atoms('cyzone 6.0 10 -10 name NOT_A_NAME') + assert_equal(len(empty), 0) def test_point(self, universe): ag = universe.select_atoms('point 5.0 5.0 5.0 3.5') @@ -775,6 +779,9 @@ def test_sphlayer(self, u): assert idx == set(ag.indices) + empty = u.select_atoms('sphlayer 2.4 6.0 name NOT_A_NAME') + assert len(empty) == 0 + def test_sphzone(self, u): r1 = u.select_atoms('resid 1') cog = r1.center_of_geometry().reshape(1, 3) From 96b63e3e47c9508753e06024f7928e1d4803a4a6 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sat, 3 Apr 2021 09:15:33 -0700 Subject: [PATCH 06/38] created decorator to test for empty selections. moved around testing to be more consistent. --- package/MDAnalysis/core/selection.py | 26 ++++++++++++------- .../core/test_atomselections.py | 8 +++--- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 93dc56e8f3a..f883229b547 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -217,6 +217,20 @@ def apply(self, group): return func(self, group) return apply +def return_empty_on_empty_selection(func): + """ + Decorator to return empty AtomGroups from the apply() function + if entire the selection is empty or a sub-expression is empty. + """ + @functools.wraps(func) + def apply(self, group): + if len(group) == 0: + return group + if len(self.sel.apply(group)) == 0: + return group[[]] + return func(self, group) + return apply + class _Selectionmeta(type): def __init__(cls, name, bases, classdict): type.__init__(type, name, bases, classdict) @@ -348,12 +362,10 @@ def __init__(self, parser, tokens): self.exRadius = float(tokens.popleft()) self.sel = parser.parse_expression(self.precedence) - @return_empty_on_apply + @return_empty_on_empty_selection def apply(self, group): indices = [] sel = self.sel.apply(group) - if len(sel) == 0: - return group[[]] box = self.validate_dimensions(group.dimensions) periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) @@ -376,12 +388,10 @@ def __init__(self, parser, tokens): self.cutoff = float(tokens.popleft()) self.sel = parser.parse_expression(self.precedence) - @return_empty_on_apply + @return_empty_on_empty_selection def apply(self, group): indices = [] sel = self.sel.apply(group) - if len(sel) == 0: - return group[[]] box = self.validate_dimensions(group.dimensions) periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) @@ -395,11 +405,9 @@ def apply(self, group): class CylindricalSelection(Selection): - @return_empty_on_apply + @return_empty_on_empty_selection def apply(self, group): sel = self.sel.apply(group) - if len(sel) == 0: - return group[[]] # Calculate vectors between point of interest and our group vecs = group.positions - sel.center_of_geometry() diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index b5120ac8328..0b8ba83d2b9 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -734,11 +734,6 @@ def test_sphzone(self, u, periodic, expected): assert len(sel) == expected - @pytest.mark.parametrize('periodic,expected', ([True, 33], [False, 25])) - def test_sphzone(self, u, periodic, expected): - sel = u.select_atoms('sphzone 5.0 name NOT_A_NAME', periodic=periodic) - assert len(sel) == 0 - class TestTriclinicDistanceSelections(BaseDistanceSelection): @pytest.fixture() @@ -793,6 +788,9 @@ def test_sphzone(self, u): assert idx == set(ag.indices) + empty = u.select_atoms('sphzone 5.0 name NOT_A_NAME') + assert len(empty) == 0 + def test_point_1(self, u): # The example selection ag = u.select_atoms('point 5.0 5.0 5.0 3.5') From d5fde7d90c3f7cc7dff2917bb257b71ba76ddba2 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sat, 3 Apr 2021 09:20:15 -0700 Subject: [PATCH 07/38] added another blank line to be consistent with PEP-8 --- package/MDAnalysis/core/selection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index f883229b547..756d7c3ebf6 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -217,6 +217,7 @@ def apply(self, group): return func(self, group) return apply + def return_empty_on_empty_selection(func): """ Decorator to return empty AtomGroups from the apply() function From 8a81f5485d7709b439a0de1d60749be8b6740c59 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sat, 3 Apr 2021 17:06:40 -0700 Subject: [PATCH 08/38] seperated testing for empty atom selection for sph* and cy* selection tokens --- testsuite/MDAnalysisTests/core/test_atomselections.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 0b8ba83d2b9..eb02538e7b0 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -209,6 +209,8 @@ def test_sphzone(self, universe, selstr): def test_cylayer(self, universe, selstr): sel = universe.select_atoms(selstr) assert_equal(len(sel), 88) + + def test_empty_cylayer(self, universe): empty = universe.select_atoms('cylayer 4.0 6.0 10 -10 name NOT_A_NAME') assert_equal(len(empty), 0) @@ -219,6 +221,8 @@ def test_cylayer(self, universe, selstr): def test_cyzone(self, universe, selstr): sel = universe.select_atoms(selstr) assert_equal(len(sel), 166) + + def test_empty_cyzone(self, universe): empty = universe.select_atoms('cyzone 6.0 10 -10 name NOT_A_NAME') assert_equal(len(empty), 0) @@ -774,6 +778,7 @@ def test_sphlayer(self, u): assert idx == set(ag.indices) + def test_empty_sphlayer(self, u): empty = u.select_atoms('sphlayer 2.4 6.0 name NOT_A_NAME') assert len(empty) == 0 @@ -788,6 +793,7 @@ def test_sphzone(self, u): assert idx == set(ag.indices) + def test_empty_sphzone(self, u): empty = u.select_atoms('sphzone 5.0 name NOT_A_NAME') assert len(empty) == 0 From a4c7e41ea868f710b3e25005d1f25c26458d09c3 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sat, 3 Apr 2021 17:14:55 -0700 Subject: [PATCH 09/38] added stacked decorators instead of repeated functionality in one decorator --- package/MDAnalysis/core/selection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 756d7c3ebf6..7035743fe50 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -225,8 +225,6 @@ def return_empty_on_empty_selection(func): """ @functools.wraps(func) def apply(self, group): - if len(group) == 0: - return group if len(self.sel.apply(group)) == 0: return group[[]] return func(self, group) @@ -363,6 +361,7 @@ def __init__(self, parser, tokens): self.exRadius = float(tokens.popleft()) self.sel = parser.parse_expression(self.precedence) + @return_empty_on_apply @return_empty_on_empty_selection def apply(self, group): indices = [] @@ -389,6 +388,7 @@ def __init__(self, parser, tokens): self.cutoff = float(tokens.popleft()) self.sel = parser.parse_expression(self.precedence) + @return_empty_on_apply @return_empty_on_empty_selection def apply(self, group): indices = [] @@ -406,6 +406,7 @@ def apply(self, group): class CylindricalSelection(Selection): + @return_empty_on_apply @return_empty_on_empty_selection def apply(self, group): sel = self.sel.apply(group) From b6dfd9a64a12c97ceaa525e95cf73ae3f8da8151 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Sun, 4 Apr 2021 12:24:43 -0700 Subject: [PATCH 10/38] removed the decorator implementation and added code directly into the functions --- package/MDAnalysis/core/selection.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 7035743fe50..d72ec4f6c03 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -218,18 +218,6 @@ def apply(self, group): return apply -def return_empty_on_empty_selection(func): - """ - Decorator to return empty AtomGroups from the apply() function - if entire the selection is empty or a sub-expression is empty. - """ - @functools.wraps(func) - def apply(self, group): - if len(self.sel.apply(group)) == 0: - return group[[]] - return func(self, group) - return apply - class _Selectionmeta(type): def __init__(cls, name, bases, classdict): type.__init__(type, name, bases, classdict) @@ -362,10 +350,11 @@ def __init__(self, parser, tokens): self.sel = parser.parse_expression(self.precedence) @return_empty_on_apply - @return_empty_on_empty_selection def apply(self, group): indices = [] sel = self.sel.apply(group) + if len(sel) == 0: + return group[[]] box = self.validate_dimensions(group.dimensions) periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) @@ -389,10 +378,11 @@ def __init__(self, parser, tokens): self.sel = parser.parse_expression(self.precedence) @return_empty_on_apply - @return_empty_on_empty_selection def apply(self, group): indices = [] sel = self.sel.apply(group) + if len(sel) == 0: + return group[[]] box = self.validate_dimensions(group.dimensions) periodic = box is not None ref = sel.center_of_geometry().reshape(1, 3).astype(np.float32) @@ -407,9 +397,10 @@ def apply(self, group): class CylindricalSelection(Selection): @return_empty_on_apply - @return_empty_on_empty_selection def apply(self, group): sel = self.sel.apply(group) + if len(sel) == 0: + return group[[]] # Calculate vectors between point of interest and our group vecs = group.positions - sel.center_of_geometry() From 0cc38ad9c51c9b50a482c91c9054f6386db72157 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 5 Apr 2021 12:46:56 -0700 Subject: [PATCH 11/38] updated CHANGELOG with more detail --- package/CHANGELOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 79ba91d1eed..0de5b4a35a4 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -22,7 +22,8 @@ The rules for this file: * 2.0.0 Fixes - * Fixed 'sphzone' behavior to be consistent with 'around' (Issue #2915) + * Fixed 'sphzone', 'sphlayer', 'cyzone', and 'cylayer' to return empty if the + zone/layer is empty, consistent with 'around' (Issue #2915) * A Universe created from an ROMol with no atoms returns now a Universe with 0 atoms (Issue #3142) * ValueError raised when empty atomgroup is given to DensityAnalysis From d0198e125e3034d6c77b2e562779a721b15e1185 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 29 Nov 2021 12:22:07 -0800 Subject: [PATCH 12/38] increased the maximum number of matches from an rdkit smarts query --- package/MDAnalysis/core/selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 269dae2a5db..3513d1c5672 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -656,7 +656,7 @@ def _apply(self, group): if not pattern: raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") mol = group.convert_to("RDKIT", **self.rdkit_kwargs) - matches = mol.GetSubstructMatches(pattern, useChirality=True) + matches = mol.GetSubstructMatches(pattern, useChirality=True, maxMatches=10000) # convert rdkit indices to mdanalysis' indices = [ mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") From 3ac808ce8841a3046e4256af523bb9e323c2ea63 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 29 Nov 2021 12:32:35 -0800 Subject: [PATCH 13/38] update changelog --- package/CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 851b401f9f3..9981724d66c 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -31,6 +31,7 @@ Enhancements option from setup.cfg (Issue #3428, PR #3429). Changes + * increase smarts atom selection limit from 1000 -> 10000 (#3469) * Dropped python 3.6 support and raised minimum numpy version to 1.18.0 (NEP29) From ad874f0e94375bcf765ab585029bdf25f703b6ff Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 29 Nov 2021 16:36:22 -0800 Subject: [PATCH 14/38] set default max_matches to largest unsigned int and add a configurable parameters --- package/MDAnalysis/core/selection.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 3513d1c5672..3f69fe9a41f 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -655,8 +655,16 @@ def _apply(self, group): pattern = Chem.MolFromSmarts(self.pattern) if not pattern: raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") + if "max_matches" in self.rdkit_kwargs: + max_matches = self.rdkit_kwargs.pop("max_matches") + else: + max_matches = np.iinfo(np.uintc).max mol = group.convert_to("RDKIT", **self.rdkit_kwargs) - matches = mol.GetSubstructMatches(pattern, useChirality=True, maxMatches=10000) + matches = mol.GetSubstructMatches( + pattern, + useChirality=True, + maxMatches=max_matches + ) # convert rdkit indices to mdanalysis' indices = [ mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") From b2a9446291c00ca32c1e29ce83caba9d919a65ab Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 29 Nov 2021 16:36:55 -0800 Subject: [PATCH 15/38] add better documentation to the smarts selection query --- package/MDAnalysis/core/groups.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 3d70e13b720..53ed991e5ba 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2977,7 +2977,12 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, smarts *SMARTS-query* select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. - All matches (max 1000) are combined as a unique match + All matches are combined as a unique match. Max matches can + be set by adding a "max_matches" key to the rdkit_kwargs + argument of select_atoms. + + >>> universe.select_atoms("C", rdkit_kwargs={"max_matches": 100}) + **Boolean** From da317ed2c58d188fe9c70321326def694bcbef29 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Mon, 29 Nov 2021 16:46:16 -0800 Subject: [PATCH 16/38] added a test for max_matches behavior --- testsuite/MDAnalysisTests/core/test_atomselections.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 3204cda219e..cb753fda058 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -580,6 +580,12 @@ def test_passing_args_to_converter(self): sel = u.select_atoms("smarts [$(O=C)]", rdkit_kwargs=dict(force=True)) assert sel.n_atoms == 2 + def test_test_test(self, u2): + sel = u2.select_atoms("smarts C", rdkit_kwargs=dict(max_matches=2)) + assert sel.n_atoms == 2 + sel2 = u2.select_atoms("smarts c") + assert sel2.n_atoms == 4 + class TestSelectionsNucleicAcids(object): @pytest.fixture(scope='class') From bef38a9b630502a120cf69e31c6bb1d427dce956 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Tue, 30 Nov 2021 09:15:04 -0800 Subject: [PATCH 17/38] change max_matches -> maxMatches and set default max to group.n_atoms --- package/MDAnalysis/core/groups.py | 4 ++-- package/MDAnalysis/core/selection.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 53ed991e5ba..cebe92da691 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2978,10 +2978,10 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. All matches are combined as a unique match. Max matches can - be set by adding a "max_matches" key to the rdkit_kwargs + be set by adding a "maxMatches" key to the rdkit_kwargs argument of select_atoms. - >>> universe.select_atoms("C", rdkit_kwargs={"max_matches": 100}) + >>> universe.select_atoms("C", rdkit_kwargs={"maxMatches": 100}) **Boolean** diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 3f69fe9a41f..01aba48ac15 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -655,10 +655,10 @@ def _apply(self, group): pattern = Chem.MolFromSmarts(self.pattern) if not pattern: raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") - if "max_matches" in self.rdkit_kwargs: - max_matches = self.rdkit_kwargs.pop("max_matches") + if "maxMatches" in self.rdkit_kwargs: + max_matches = self.rdkit_kwargs.pop("maxMatches") else: - max_matches = np.iinfo(np.uintc).max + max_matches = group.n_atoms mol = group.convert_to("RDKIT", **self.rdkit_kwargs) matches = mol.GetSubstructMatches( pattern, From 547d8f6a6add366f06c6b68e522ea8ad27522920 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Tue, 30 Nov 2021 09:18:42 -0800 Subject: [PATCH 18/38] update CHANGELOG to reflect recent changes --- package/CHANGELOG | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 9981724d66c..6a362cbc90e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,8 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -??/??/?? IAlibay, melomcr, mdd31, ianmkenney, richardjgowers, hmacdope, orbeckst, scal444 +??/??/?? IAlibay, melomcr, mdd31, ianmkenney, richardjgowers, hmacdope, + orbeckst, scal444, orioncohen * 2.1.0 Fixes @@ -31,7 +32,8 @@ Enhancements option from setup.cfg (Issue #3428, PR #3429). Changes - * increase smarts atom selection limit from 1000 -> 10000 (#3469) + * increase smarts atom selection limit from 1000 -> n_atoms. Add + maxMatches kwarg to rdkit_kwargs. (Issue #3469, PR #3470) * Dropped python 3.6 support and raised minimum numpy version to 1.18.0 (NEP29) From 937e580c61681694e03e9cb7f58ffd17b4a7e874 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Tue, 30 Nov 2021 09:38:26 -0800 Subject: [PATCH 19/38] clean up syntax and expose useChirality kwarg --- package/CHANGELOG | 4 ++-- package/MDAnalysis/core/groups.py | 2 +- package/MDAnalysis/core/selection.py | 8 +++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 6a362cbc90e..73097e89d2c 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -30,10 +30,10 @@ Fixes Enhancements * Add option for custom compiler flags for C/C++ on build and remove march option from setup.cfg (Issue #3428, PR #3429). + * increase smarts atom selection limit from 1000 -> n_atoms. Add + maxMatches and useChirality to rdkit_kwargs. (Issue #3469, PR #3470) Changes - * increase smarts atom selection limit from 1000 -> n_atoms. Add - maxMatches kwarg to rdkit_kwargs. (Issue #3469, PR #3470) * Dropped python 3.6 support and raised minimum numpy version to 1.18.0 (NEP29) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index cebe92da691..7112ffdd7a6 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2979,7 +2979,7 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. All matches are combined as a unique match. Max matches can be set by adding a "maxMatches" key to the rdkit_kwargs - argument of select_atoms. + argument of select_atoms. "useChirality" can be set similarly. >>> universe.select_atoms("C", rdkit_kwargs={"maxMatches": 100}) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 01aba48ac15..2bf830b81c1 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -655,14 +655,12 @@ def _apply(self, group): pattern = Chem.MolFromSmarts(self.pattern) if not pattern: raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") - if "maxMatches" in self.rdkit_kwargs: - max_matches = self.rdkit_kwargs.pop("maxMatches") - else: - max_matches = group.n_atoms + use_chirality = self.rdkit_kwargs.pop("useChirality", True) + max_matches = self.rdkit_kwargs.pop("maxMatches", group.n_atoms) mol = group.convert_to("RDKIT", **self.rdkit_kwargs) matches = mol.GetSubstructMatches( pattern, - useChirality=True, + useChirality=use_chirality, maxMatches=max_matches ) # convert rdkit indices to mdanalysis' From 4e126d110eb2c56535f4649461c1e108d798682d Mon Sep 17 00:00:00 2001 From: orioncohen Date: Tue, 30 Nov 2021 09:43:13 -0800 Subject: [PATCH 20/38] add better documentation for rdkit_kwargs --- package/MDAnalysis/core/groups.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 7112ffdd7a6..3e3d03c3d3a 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2980,6 +2980,7 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, All matches are combined as a unique match. Max matches can be set by adding a "maxMatches" key to the rdkit_kwargs argument of select_atoms. "useChirality" can be set similarly. + All other rdkit_kwargs will be passed RDKitConverter.convert(). >>> universe.select_atoms("C", rdkit_kwargs={"maxMatches": 100}) From 086e0506c5c180f7858a8be50388fcc336dcb845 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Thu, 2 Dec 2021 13:07:06 -0800 Subject: [PATCH 21/38] use two kwargs variables to pass kwargs to smarts queries --- package/MDAnalysis/core/groups.py | 19 +++++++++++-------- package/MDAnalysis/core/selection.py | 17 +++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 3e3d03c3d3a..a86d5d469cf 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2826,7 +2826,7 @@ def ts(self): def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, atol=1e-08, updating=False, sorted=True, - rdkit_kwargs=None, **selgroups): + rdkit_kwargs=None, smarts_kwargs=None, **selgroups): """Select atoms from within this Group using a selection string. Returns an :class:`AtomGroup` sorted according to their index in the @@ -2977,13 +2977,15 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, smarts *SMARTS-query* select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. - All matches are combined as a unique match. Max matches can - be set by adding a "maxMatches" key to the rdkit_kwargs - argument of select_atoms. "useChirality" can be set similarly. - All other rdkit_kwargs will be passed RDKitConverter.convert(). + All matches are combined as a unique match. Uses two sets of + kwargs: rdkit_kwargs is passed to `RDKitConverter.convert()` + and smarts_kwargs is passed to RDKit's [GetSubstructMatches]( + https://www.rdkit.org/docs/source/ + rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol.GetSubstructMatches). + The useChirality kwarg is True by default. - >>> universe.select_atoms("C", rdkit_kwargs={"maxMatches": 100}) - + >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) + **Boolean** @@ -3153,7 +3155,8 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, periodic=periodic, atol=atol, rtol=rtol, sorted=sorted, - rdkit_kwargs=rdkit_kwargs) + rdkit_kwargs=rdkit_kwargs, + smarts_kwargs=smarts_kwargs) for s in sel_strs)) if updating: atomgrp = UpdatingAtomGroup(self, selections, sel_strs) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 2bf830b81c1..5d33cb8d3b4 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -643,6 +643,7 @@ def __init__(self, parser, tokens): pattern.append(val) self.pattern = "".join(pattern) self.rdkit_kwargs = parser.rdkit_kwargs + self.smarts_kwargs = parser.smarts_kwargs def _apply(self, group): try: @@ -655,14 +656,13 @@ def _apply(self, group): pattern = Chem.MolFromSmarts(self.pattern) if not pattern: raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") - use_chirality = self.rdkit_kwargs.pop("useChirality", True) - max_matches = self.rdkit_kwargs.pop("maxMatches", group.n_atoms) mol = group.convert_to("RDKIT", **self.rdkit_kwargs) - matches = mol.GetSubstructMatches( - pattern, - useChirality=use_chirality, - maxMatches=max_matches - ) + # override GetSubstructMatches default values + if "useChirality" not in self.smarts_kwargs: + self.smarts_kwargs["useChirality"] = True + if "maxMatches" not in self.smarts_kwargs: + self.smarts_kwargs["useChirality"] = group.n_atoms + matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) # convert rdkit indices to mdanalysis' indices = [ mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") @@ -1394,7 +1394,7 @@ def expect(self, token): "".format(self.tokens[0], token)) def parse(self, selectstr, selgroups, periodic=None, atol=1e-08, - rtol=1e-05, sorted=True, rdkit_kwargs=None): + rtol=1e-05, sorted=True, rdkit_kwargs=None, smarts_kwargs=None): """Create a Selection object from a string. Parameters @@ -1439,6 +1439,7 @@ def parse(self, selectstr, selgroups, periodic=None, atol=1e-08, self.rtol = rtol self.sorted = sorted self.rdkit_kwargs = rdkit_kwargs or {} + self.smarts_kwargs = smarts_kwargs or {} self.selectstr = selectstr self.selgroups = selgroups From 14d163a600dbdfbf75e32778b3c15aef796c3c56 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Thu, 2 Dec 2021 13:07:22 -0800 Subject: [PATCH 22/38] update testing to use smarts_kwargs --- testsuite/MDAnalysisTests/core/test_atomselections.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index cb753fda058..8db3e6536ec 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -575,13 +575,13 @@ def test_invalid_smarts_sel_raises_error(self, u2): with pytest.raises(ValueError, match="not a valid SMARTS"): u2.select_atoms("smarts foo") - def test_passing_args_to_converter(self): + def test_passing_rdkit_kwargs_to_converter(self): u = mda.Universe.from_smiles("O=C=O") sel = u.select_atoms("smarts [$(O=C)]", rdkit_kwargs=dict(force=True)) assert sel.n_atoms == 2 - def test_test_test(self, u2): - sel = u2.select_atoms("smarts C", rdkit_kwargs=dict(max_matches=2)) + def test_passing_smarts_kwargs_to_converter(self, u2): + sel = u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) assert sel.n_atoms == 2 sel2 = u2.select_atoms("smarts c") assert sel2.n_atoms == 4 From d89b1310ff4832dfbb7af43990352df7685bd6eb Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 3 Dec 2021 12:16:07 -0800 Subject: [PATCH 23/38] small bug fix --- package/MDAnalysis/core/selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 5d33cb8d3b4..b509ebc53a2 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -661,7 +661,7 @@ def _apply(self, group): if "useChirality" not in self.smarts_kwargs: self.smarts_kwargs["useChirality"] = True if "maxMatches" not in self.smarts_kwargs: - self.smarts_kwargs["useChirality"] = group.n_atoms + self.smarts_kwargs["maxMatches"] = group.n_atoms matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) # convert rdkit indices to mdanalysis' indices = [ From 1d7d8986b5bababe6d011f14e43db277b1bbcfbc Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 3 Dec 2021 12:16:44 -0800 Subject: [PATCH 24/38] updated testing to increase code coverage --- testsuite/MDAnalysisTests/core/test_atomselections.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 8db3e6536ec..3ef5dce923c 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -585,6 +585,13 @@ def test_passing_smarts_kwargs_to_converter(self, u2): assert sel.n_atoms == 2 sel2 = u2.select_atoms("smarts c") assert sel2.n_atoms == 4 + isomer_1 = "CCC@HO" + isomer_2 = "CCC@@HO" + u = mda.Universe.from_smiles("CC[C@H](C)O") + sel3 = u.select_atoms("byres smarts CC[C@@H](C)O") + assert sel3.n_atoms == 0 + sel4 = u.select_atoms("byres smarts CC[C@@H](C)O", smarts_kwargs={"useChirality": False}) + assert sel4.n_atoms == 15 class TestSelectionsNucleicAcids(object): From 207e858a9357b61eea87f2e8c3230c31f4721fd3 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 3 Dec 2021 12:20:55 -0800 Subject: [PATCH 25/38] cleaned up testing for smarts kwargs --- testsuite/MDAnalysisTests/core/test_atomselections.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 3ef5dce923c..e602e991385 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -580,13 +580,13 @@ def test_passing_rdkit_kwargs_to_converter(self): sel = u.select_atoms("smarts [$(O=C)]", rdkit_kwargs=dict(force=True)) assert sel.n_atoms == 2 - def test_passing_smarts_kwargs_to_converter(self, u2): + def test_passing_max_matches_to_converter(self, u2): sel = u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) assert sel.n_atoms == 2 sel2 = u2.select_atoms("smarts c") assert sel2.n_atoms == 4 - isomer_1 = "CCC@HO" - isomer_2 = "CCC@@HO" + + def test_passing_use_chirality_to_converter(self): u = mda.Universe.from_smiles("CC[C@H](C)O") sel3 = u.select_atoms("byres smarts CC[C@@H](C)O") assert sel3.n_atoms == 0 From a210d43f1ceb80967ca6d502e75d60cf119653fc Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 10 Dec 2021 09:39:01 -0800 Subject: [PATCH 26/38] updated and clarified documentation --- package/CHANGELOG | 4 ++-- package/MDAnalysis/core/groups.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 73097e89d2c..0e1835a8f86 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -28,10 +28,10 @@ Fixes created using add_Residue and add_Segment (Issue #3437) Enhancements - * Add option for custom compiler flags for C/C++ on build and remove march - option from setup.cfg (Issue #3428, PR #3429). * increase smarts atom selection limit from 1000 -> n_atoms. Add maxMatches and useChirality to rdkit_kwargs. (Issue #3469, PR #3470) + * Add option for custom compiler flags for C/C++ on build and remove march + option from setup.cfg (Issue #3428, PR #3429). Changes * Dropped python 3.6 support and raised minimum numpy version to 1.18.0 diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index a86d5d469cf..9ca012a7604 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2859,6 +2859,10 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, Arguments passed to the :class:`~MDAnalysis.converters.RDKit.RDKitConverter` when using selection based on SMARTS queries + smarts_kwargs : dict (optional) + Arguments passed internally to RDKit's `GetSubstructMatches + `_. + **selgroups : keyword arguments of str: AtomGroup (optional) when using the "group" keyword in selections, groups are defined by passing them as keyword arguments. See section on **preexisting @@ -2977,12 +2981,13 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, smarts *SMARTS-query* select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. - All matches are combined as a unique match. Uses two sets of - kwargs: rdkit_kwargs is passed to `RDKitConverter.convert()` - and smarts_kwargs is passed to RDKit's [GetSubstructMatches]( - https://www.rdkit.org/docs/source/ - rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol.GetSubstructMatches). - The useChirality kwarg is True by default. + All matches are combined as a single unique match. The `smarts` + selection accepts two sets of key word arguments from + `select_atoms()`: the rdkit_kwargs are passed internally to + `RDKitConverter.convert()` and the smarts_kwargs are passed to + RDKit's `GetSubstructMatches + `_. + By default, the `useChirality` kwarg in rdkit_kwargs is set to true. >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) From 8a67c76fb5cd09adaf6f2a8e102ae51e954d6a1a Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 10 Dec 2021 09:39:27 -0800 Subject: [PATCH 27/38] cleaned up code style and used setdefaults rather than if statement --- package/MDAnalysis/core/selection.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index b509ebc53a2..907145d5fd9 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -658,12 +658,10 @@ def _apply(self, group): raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") mol = group.convert_to("RDKIT", **self.rdkit_kwargs) # override GetSubstructMatches default values - if "useChirality" not in self.smarts_kwargs: - self.smarts_kwargs["useChirality"] = True - if "maxMatches" not in self.smarts_kwargs: - self.smarts_kwargs["maxMatches"] = group.n_atoms + self.smarts_kwargs.setdefault("useChirality", True) + self.smarts_kwargs.setdefault("maxMatches", group.n_atoms) matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) - # convert rdkit indices to mdanalysis' + # convert rdkit indices to mdanalysis indices = [ mol.GetAtomWithIdx(idx).GetIntProp("_MDAnalysis_index") for match in matches for idx in match] From dc5a70451e52abe64898d6a26af8489e1c6b1499 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 10 Dec 2021 09:51:28 -0800 Subject: [PATCH 28/38] added versionchaged info at 2.0.1 --- package/MDAnalysis/core/groups.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 9ca012a7604..d30f60668cb 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3141,6 +3141,9 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, Added the *smarts* selection. Added `atol` and `rtol` keywords to select float values. Added the ``sort`` keyword. Added `rdkit_kwargs` to pass parameters to the RDKitConverter. + .. versionchanged:: 2.0.1 + Added `smarts_kwargs` to pass parameters to the RDKit + GetSubstructMatch for *smarts* selection. """ if not sel: From e984be0731cd40db67771c6b22ba25403acd19cf Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 14 Jan 2022 12:30:18 -0800 Subject: [PATCH 29/38] returned to default to 1000 --- package/MDAnalysis/core/groups.py | 3 ++- package/MDAnalysis/core/selection.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index d30f60668cb..3afb735a208 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -2987,7 +2987,8 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, `RDKitConverter.convert()` and the smarts_kwargs are passed to RDKit's `GetSubstructMatches `_. - By default, the `useChirality` kwarg in rdkit_kwargs is set to true. + By default, the `useChirality` kwarg in rdkit_kwargs is set to true + and maxMatches in smarts_kwargs is 1000. >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 907145d5fd9..dcb215a43fb 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -659,7 +659,7 @@ def _apply(self, group): mol = group.convert_to("RDKIT", **self.rdkit_kwargs) # override GetSubstructMatches default values self.smarts_kwargs.setdefault("useChirality", True) - self.smarts_kwargs.setdefault("maxMatches", group.n_atoms) + self.smarts_kwargs.setdefault("maxMatches", 1000) matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) # convert rdkit indices to mdanalysis indices = [ From cbcba0fa5b5cc7c64df9d97d6586070c4868cc64 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Fri, 14 Jan 2022 12:36:27 -0800 Subject: [PATCH 30/38] update changelog to reflect no change to defaults --- package/CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 0e1835a8f86..22e73e93e3b 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -28,8 +28,8 @@ Fixes created using add_Residue and add_Segment (Issue #3437) Enhancements - * increase smarts atom selection limit from 1000 -> n_atoms. Add - maxMatches and useChirality to rdkit_kwargs. (Issue #3469, PR #3470) + * Add smarts_kwargs to allow greater flexiblity with smarts atom selection + (Issue #3469, PR #3470). * Add option for custom compiler flags for C/C++ on build and remove march option from setup.cfg (Issue #3428, PR #3429). From e57695aa4d4b91734a3046fe84c4365b83951f59 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Wed, 18 May 2022 15:55:15 -0700 Subject: [PATCH 31/38] added docstring parameter for smarts_kwargs --- package/MDAnalysis/core/selection.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 83786b52311..d595fea2ed4 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -1434,7 +1434,9 @@ def parse(self, selectstr, selgroups, periodic=None, atol=1e-08, rdkit_kwargs : dict, optional Arguments passed to the RDKitConverter when using selection based on SMARTS queries - + smarts_kwargs : dict, optional + Arguments passed internally to RDKit's `GetSubstructMatches + `_. Returns ------- From 9eabb4c4727e45cc03a965041fb447505a65a1fe Mon Sep 17 00:00:00 2001 From: orioncohen Date: Wed, 18 May 2022 15:56:44 -0700 Subject: [PATCH 32/38] updated documentation description for smarts query and duplicated in sphinx docs --- package/MDAnalysis/core/groups.py | 4 ++-- .../source/documentation_pages/selections.rst | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 040b5ba30a2..66a696220fe 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3016,8 +3016,8 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. All matches are combined as a single unique match. The `smarts` selection accepts two sets of key word arguments from - `select_atoms()`: the rdkit_kwargs are passed internally to - `RDKitConverter.convert()` and the smarts_kwargs are passed to + `select_atoms()`: the ``rdkit_kwargs`` are passed internally to + `RDKitConverter.convert()` and the ``smarts_kwargs`` are passed to RDKit's `GetSubstructMatches `_. By default, the `useChirality` kwarg in rdkit_kwargs is set to true diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 9431f0ef4b8..8c3f030980a 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -164,9 +164,16 @@ moltype *molecule-type* the TPR format defines the molecule type. smarts *SMARTS-query* - select atoms using Daylight's SMARTS queries, e.g. ``smarts [#7;R]`` to - find nitrogen atoms in rings. Requires RDKit. All matches (max 1000) are - combined as a unique match. + select atoms using Daylight's SMARTS queries, e.g. ``smarts + [#7;R]`` to find nitrogen atoms in rings. Requires RDKit. + All matches are combined as a single unique match. The `smarts` + selection accepts two sets of key word arguments from + `select_atoms()`: the ``rdkit_kwargs`` are passed internally to + `RDKitConverter.convert()` and the ``smarts_kwargs`` are passed to + RDKit's `GetSubstructMatches + `_. + By default, the `useChirality` kwarg in rdkit_kwargs is set to true + and maxMatches in smarts_kwargs is 1000. chiral *R | S* select a particular stereocenter. e.g. ``name C and chirality S`` From ae5c5e62a61b5c6b913744045d5e3263a9b32185 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Thu, 19 May 2022 10:30:07 -0700 Subject: [PATCH 33/38] change default maxMatches to 10 * n_atoms and add test --- package/MDAnalysis/core/selection.py | 10 +++++++++- testsuite/MDAnalysisTests/core/test_atomselections.py | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index d595fea2ed4..7c8c530697b 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -680,8 +680,16 @@ def _apply(self, group): raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") mol = group.convert_to("RDKIT", **self.rdkit_kwargs) self.smarts_kwargs.setdefault("useChirality", True) - self.smarts_kwargs.setdefault("maxMatches", 1000) + self.smarts_kwargs.setdefault("maxMatches", len(group) * 10) matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) + if len(matches) == self.smarts_kwargs["maxMatches"]: + warnings.warn("Your smarts-based atom selection returned the max" + "number of matches. This indicates that not all" + "matching atoms were selected. When calling" + "atom_group.select_atoms(), the default value" + "of maxMatches is len(atom_group * 10). To fix, " + "add the following argument to select_atoms: \n" + "smarts_kwargs={maxMatches: }") # flatten all matches and remove duplicated indices indices = np.unique([idx for match in matches for idx in match]) # create boolean mask for atoms based on index diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 9958d199b39..58a72a687e8 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -585,6 +585,10 @@ def test_passing_max_matches_to_converter(self, u2): assert sel.n_atoms == 2 sel2 = u2.select_atoms("smarts c") assert sel2.n_atoms == 4 + with pytest.warns(UserWarning) as warnings: + u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) + u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=1000)) + assert len(warnings) == 1 def test_passing_use_chirality_to_converter(self): u = mda.Universe.from_smiles("CC[C@H](C)O") From 1e6eba0de67852019a5befc1d942a56340396af5 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Thu, 19 May 2022 10:30:28 -0700 Subject: [PATCH 34/38] update documentation for smarts kwargs --- package/MDAnalysis/core/groups.py | 5 +++-- package/doc/sphinx/source/documentation_pages/selections.rst | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 66a696220fe..9db27397c66 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3020,8 +3020,9 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, `RDKitConverter.convert()` and the ``smarts_kwargs`` are passed to RDKit's `GetSubstructMatches `_. - By default, the `useChirality` kwarg in rdkit_kwargs is set to true - and maxMatches in smarts_kwargs is 1000. + By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true + and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or + ``10 * len(Universe.atoms)``, whichever is applicable. >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 8c3f030980a..895deacb577 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -172,8 +172,9 @@ smarts *SMARTS-query* `RDKitConverter.convert()` and the ``smarts_kwargs`` are passed to RDKit's `GetSubstructMatches `_. - By default, the `useChirality` kwarg in rdkit_kwargs is set to true - and maxMatches in smarts_kwargs is 1000. + By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true + and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or + ``10 * len(Universe.atoms)``, whichever is applicable. chiral *R | S* select a particular stereocenter. e.g. ``name C and chirality S`` From e0e28a36e0c7c15750a4b9e87cd3205145c3d2f0 Mon Sep 17 00:00:00 2001 From: orioncohen Date: Thu, 19 May 2022 10:34:08 -0700 Subject: [PATCH 35/38] add smarts kwargs warning to docs --- package/MDAnalysis/core/groups.py | 4 ++++ package/doc/sphinx/source/documentation_pages/selections.rst | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 9db27397c66..a8e753811da 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3023,6 +3023,10 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or ``10 * len(Universe.atoms)``, whichever is applicable. + Note that the number of matches can occasionally exceed the default + value of maxMatches, causing too few atoms to be returned. If this + occurs, a warning will be issued. The problem can be fixed by increasing + the value of maxMatches. This behavior may be updated in the future. >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 895deacb577..4c653c3566c 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -175,6 +175,10 @@ smarts *SMARTS-query* By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or ``10 * len(Universe.atoms)``, whichever is applicable. + Note that the number of matches can occasionally exceed the default + value of maxMatches, causing too few atoms to be returned. If this + occurs, a warning will be issued. The problem can be fixed by increasing + the value of maxMatches. This behavior may be updated in the future. chiral *R | S* select a particular stereocenter. e.g. ``name C and chirality S`` From e38e219416c233d1bcfeb7e80e4a78c751dd1f77 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Wed, 1 Jun 2022 13:01:06 +0100 Subject: [PATCH 36/38] various doc improvements and maxMatches fix --- package/CHANGELOG | 6 ++++-- package/MDAnalysis/core/groups.py | 16 +++++++++------- package/MDAnalysis/core/selection.py | 19 ++++++++++++++++--- .../core/test_atomselections.py | 16 ++++++++-------- 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 2acbb42a8ca..76a759eaeca 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -58,6 +58,8 @@ Fixes * Fixed BAT method Cartesian modifies input data. (Issue #3501) Enhancements + * Add smarts_kwargs to allow greater flexiblity with smarts atom selection + (Issue #3469, PR #3470). * Added `frames` argument to AnalysisBase.run to allow analysis to run on * Added equations for `center_of_mass` and `center_of_geometry`to documentation (PR #3671) @@ -81,10 +83,10 @@ Enhancements * CRD extended format can now be explicitly requested with the `extended` keyword (Issue #3605) * Added benchmarks in lib.distances for enhancement (Issue #3205, PR #3641) - * Add smarts_kwargs to allow greater flexiblity with smarts atom selection - (Issue #3469, PR #3470). Changes + * smarts selection's `maxMatches` now defaults to max(1000, n_atoms*10) + instead of the standard RDKit default of 1000 (Issue #3469, PR #3470). * `fasteners` package is now a core dependency (Issues #3230, #1988, PR #3375) * The minimm Python, NumPy, and SciPy versions (following NEP29) are now 3.8, 1.19, and 1.5.0; GSD now also has a minimum version of 1.9.3 and networkx diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 7edf3881895..1082cd527c3 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3023,12 +3023,14 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, RDKit's `GetSubstructMatches `_. By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true - and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or - ``10 * len(Universe.atoms)``, whichever is applicable. - Note that the number of matches can occasionally exceed the default - value of maxMatches, causing too few atoms to be returned. If this - occurs, a warning will be issued. The problem can be fixed by increasing - the value of maxMatches. This behavior may be updated in the future. + and maxMatches in ``smarts_kwargs`` is + ``max(1000, 10 * n_atoms)``, where ``n_atoms`` is either + ``len(AtomGroup)`` or ``len(Universe.atoms)``, whichever is + applicable. Note that the number of matches can occasionally + exceed the default value of maxMatches, causing too few atoms + to be returned. If this occurs, a warning will be issued. The + problem can be fixed by increasing the value of maxMatches. + This behavior may be updated in the future. >>> universe.select_atoms("C", smarts_kwargs={"maxMatches": 100}) @@ -3188,7 +3190,7 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05, Added the *smarts* selection. Added `atol` and `rtol` keywords to select float values. Added the ``sort`` keyword. Added `rdkit_kwargs` to pass parameters to the RDKitConverter. - .. versionchanged:: 2.0.1 + .. versionchanged:: 2.2.0 Added `smarts_kwargs` to pass parameters to the RDKit GetSubstructMatch for *smarts* selection. """ diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 7c8c530697b..9692fc66395 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -637,6 +637,15 @@ class SmartsSelection(Selection): Uses RDKit to run the query and converts the result to MDAnalysis. Supports chirality. + + .. versionchanged:: 2.2.0 + ``rdkit_wargs`` and ``smarts_kwargs`` can now be passed to control + the behaviour of the RDKit converter and RDKit's ``GetSubstructMatches`` + respectively. + The default ``maxMatches`` value passed to ``GetSubstructMatches`` has + been changed from ``1000`` to ``max(1000, n_atoms * 10)`` in order to + limit cases where too few matches were generated. A warning is now also + thrown if ``maxMatches`` has been reached. """ token = 'smarts' @@ -680,15 +689,16 @@ def _apply(self, group): raise ValueError(f"{self.pattern!r} is not a valid SMARTS query") mol = group.convert_to("RDKIT", **self.rdkit_kwargs) self.smarts_kwargs.setdefault("useChirality", True) - self.smarts_kwargs.setdefault("maxMatches", len(group) * 10) + self.smarts_kwargs.setdefault("maxMatches", max(1000, len(group) * 10)) matches = mol.GetSubstructMatches(pattern, **self.smarts_kwargs) if len(matches) == self.smarts_kwargs["maxMatches"]: warnings.warn("Your smarts-based atom selection returned the max" "number of matches. This indicates that not all" "matching atoms were selected. When calling" "atom_group.select_atoms(), the default value" - "of maxMatches is len(atom_group * 10). To fix, " - "add the following argument to select_atoms: \n" + "of maxMatches is max(100, len(atom_group * 10)). " + "To fix this, add the following argument to " + "select_atoms: \n" "smarts_kwargs={maxMatches: }") # flatten all matches and remove duplicated indices indices = np.unique([idx for match in matches for idx in match]) @@ -1460,6 +1470,9 @@ def parse(self, selectstr, selgroups, periodic=None, atol=1e-08, .. versionchanged:: 2.0.0 Added `atol` and `rtol` keywords to select float values. Added `rdkit_kwargs` to pass arguments to the RDKitConverter + .. versionchanged:: 2.2.0 + Added ``smarts_kwargs`` argument, allowing users to pass a + a dictionary of arguments to RDKit's ``GetSubstructMatches``. """ self.periodic = periodic self.atol = atol diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index 58a72a687e8..f24911f9f4d 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -581,14 +581,14 @@ def test_passing_rdkit_kwargs_to_converter(self): assert sel.n_atoms == 2 def test_passing_max_matches_to_converter(self, u2): - sel = u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) - assert sel.n_atoms == 2 - sel2 = u2.select_atoms("smarts c") - assert sel2.n_atoms == 4 - with pytest.warns(UserWarning) as warnings: - u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) - u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=1000)) - assert len(warnings) == 1 + with pytest.warns(UserWarning, match="Your smarts-based") as wsmg: + sel = u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) + sel2 = u2.select_atoms( + "smarts C", smarts_kwargs=dict(maxMatches=1000)) + assert sel.n_atoms == sel2.n_atoms == 2 + + sel3 = u2.select_atoms("smarts c") + assert sel3.n_atoms == 4 def test_passing_use_chirality_to_converter(self): u = mda.Universe.from_smiles("CC[C@H](C)O") From e7d6a68bf1a4930b042b2696fbe9bab8fcceb72f Mon Sep 17 00:00:00 2001 From: IAlibay Date: Wed, 1 Jun 2022 13:04:57 +0100 Subject: [PATCH 37/38] update selections.rst --- .../source/documentation_pages/selections.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/package/doc/sphinx/source/documentation_pages/selections.rst b/package/doc/sphinx/source/documentation_pages/selections.rst index 4c653c3566c..986cde2527d 100644 --- a/package/doc/sphinx/source/documentation_pages/selections.rst +++ b/package/doc/sphinx/source/documentation_pages/selections.rst @@ -173,12 +173,13 @@ smarts *SMARTS-query* RDKit's `GetSubstructMatches `_. By default, the `useChirality` kwarg in ``rdkit_kwargs`` is set to true - and maxMatches in ``smarts_kwargs`` is ``10 * len(AtomGroup)`` or - ``10 * len(Universe.atoms)``, whichever is applicable. - Note that the number of matches can occasionally exceed the default - value of maxMatches, causing too few atoms to be returned. If this - occurs, a warning will be issued. The problem can be fixed by increasing - the value of maxMatches. This behavior may be updated in the future. + and maxMatches in ``smarts_kwargs`` is ``max(1000, 10 * n_atoms)``, where + ``n_atoms`` is either ``len(AtomGroup)`` or ``len(Universe.atoms)``, + whichever is applicable. Note that the number of matches can occasionally + exceed the default value of maxMatches, causing too few atoms to be + returned. If this occurs, a warning will be issued. The problem can be + fixed by increasing the value of maxMatches. This behavior may be updated + in the future chiral *R | S* select a particular stereocenter. e.g. ``name C and chirality S`` From c739404e31b59620e440ee1ed010bcbbd62dc4d2 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Wed, 1 Jun 2022 13:56:43 +0100 Subject: [PATCH 38/38] fix tests --- testsuite/MDAnalysisTests/core/test_atomselections.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/core/test_atomselections.py b/testsuite/MDAnalysisTests/core/test_atomselections.py index f24911f9f4d..8f27c99d794 100644 --- a/testsuite/MDAnalysisTests/core/test_atomselections.py +++ b/testsuite/MDAnalysisTests/core/test_atomselections.py @@ -585,7 +585,8 @@ def test_passing_max_matches_to_converter(self, u2): sel = u2.select_atoms("smarts C", smarts_kwargs=dict(maxMatches=2)) sel2 = u2.select_atoms( "smarts C", smarts_kwargs=dict(maxMatches=1000)) - assert sel.n_atoms == sel2.n_atoms == 2 + assert sel.n_atoms == 2 + assert sel2.n_atoms == 3 sel3 = u2.select_atoms("smarts c") assert sel3.n_atoms == 4