-
Notifications
You must be signed in to change notification settings - Fork 823
Faster name selections #2755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Faster name selections #2755
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
7af1162
modified AtomNames topologyattr to include lookup table index
richardjgowers b45d165
cheeky little optimisation
richardjgowers 76135eb
rework atom name selection to use lookup tables
richardjgowers 17fe547
Update topologyattrs.py
richardjgowers a4364e2
fixed test supplying integer as atom name
richardjgowers b7b20b3
Update test_topologyattrs.py
richardjgowers 5611f96
use dict-lookup string attrs EVERYWHERERE
richardjgowers 6a66361
removed some code duplication
richardjgowers 3c4dc32
improved nucleic/backbone selections
richardjgowers 07757a5
Added explicit tests for Resnames topologyattr
richardjgowers 95f4d15
use fnmatchcase to be case sensitive
richardjgowers 59f5cde
Merge branch 'develop' into faster_name_selections
richardjgowers 498c84f
Merge branch 'develop' into faster_name_selections
orbeckst 5ae8edd
Update package/MDAnalysis/core/selection.py
orbeckst 3cf5bcb
Merge branch 'develop' into faster_name_selections
orbeckst 640c411
apply suggestions from code review
orbeckst 7493155
Merge branch 'develop' into faster_name_selections
orbeckst ee64c32
added test for setting multiple segids at once
richardjgowers File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -515,7 +515,7 @@ def apply(self, group): | |
| return group[mask] | ||
|
|
||
|
|
||
| class StringSelection(Selection): | ||
| class _ProtoStringSelection(Selection): | ||
| """Selections based on text attributes | ||
|
|
||
| .. versionchanged:: 1.0.0 | ||
|
|
@@ -530,11 +530,23 @@ def __init__(self, parser, tokens): | |
|
|
||
| @return_empty_on_apply | ||
| def apply(self, group): | ||
| mask = np.zeros(len(group), dtype=bool) | ||
| for val in self.values: | ||
| values = getattr(group, self.field) | ||
| mask |= [fnmatch.fnmatch(x, val) for x in values] | ||
| return group[mask].unique | ||
| # rather than work on group.names, cheat and look at the lookup table | ||
| nmattr = getattr(group.universe._topology, self.field) | ||
|
|
||
| matches = [] # list of passing indices | ||
richardjgowers marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # iterate through set of known atom names, check which pass | ||
| for nm, ix in nmattr.namedict.items(): | ||
| if any(fnmatch.fnmatchcase(nm, val) for val in self.values): | ||
| matches.append(ix) | ||
|
|
||
| # atomname indices for members of this group | ||
| nmidx = nmattr.nmidx[getattr(group, self.level)] | ||
|
|
||
| return group[np.in1d(nmidx, matches)].unique | ||
|
|
||
|
|
||
| class StringSelection(_ProtoStringSelection): | ||
| level = 'ix' # operates on atom level attribute, i.e. '.ix' | ||
|
|
||
|
|
||
| class AtomNameSelection(StringSelection): | ||
|
|
@@ -561,22 +573,27 @@ class AtomICodeSelection(StringSelection): | |
| field = 'icodes' | ||
|
|
||
|
|
||
| class ResidueNameSelection(StringSelection): | ||
| class _ResidueStringSelection(_ProtoStringSelection): | ||
| level= 'resindices' | ||
|
|
||
|
|
||
| class ResidueNameSelection(_ResidueStringSelection): | ||
| """Select atoms based on 'resnames' attribute""" | ||
| token = 'resname' | ||
| field = 'resnames' | ||
|
|
||
|
|
||
| class MoleculeTypeSelection(StringSelection): | ||
| class MoleculeTypeSelection(_ResidueStringSelection): | ||
| """Select atoms based on 'moltypes' attribute""" | ||
| token = 'moltype' | ||
| field = 'moltypes' | ||
|
|
||
|
|
||
| class SegmentNameSelection(StringSelection): | ||
| class SegmentNameSelection(_ProtoStringSelection): | ||
| """Select atoms based on 'segids' attribute""" | ||
| token = 'segid' | ||
| field = 'segids' | ||
| level = 'segindices' | ||
|
|
||
|
|
||
| class AltlocSelection(StringSelection): | ||
|
|
@@ -802,10 +819,15 @@ class ProteinSelection(Selection): | |
| See Also | ||
| -------- | ||
| :func:`MDAnalysis.lib.util.convert_aa_code` | ||
|
|
||
|
|
||
| .. versionchanged:: 2.0.0 | ||
orbeckst marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| prot_res changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'protein' | ||
|
|
||
| prot_res = np.array([ | ||
| prot_res = { | ||
| # CHARMM top_all27_prot_lipid.rtf | ||
| 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD', | ||
| 'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', | ||
|
|
@@ -828,14 +850,20 @@ class ProteinSelection(Selection): | |
| 'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE', | ||
| 'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS', | ||
| 'CCYX', 'CMET', 'CME', 'ASF', | ||
| ]) | ||
| } | ||
|
|
||
| def __init__(self, parser, tokens): | ||
| pass | ||
|
|
||
| def apply(self, group): | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| mask = np.in1d(group.resnames, self.prot_res) | ||
| return group[mask].unique | ||
| resname_attr = group.universe._topology.resnames | ||
| # which values in resname attr are in prot_res? | ||
| matches = [ix for (nm, ix) in resname_attr.namedict.items() | ||
| if nm in self.prot_res] | ||
| # index of each atom's resname | ||
| nmidx = resname_attr.nmidx[group.resindices] | ||
| # intersect atom's resname index and matches to prot_res | ||
| return group[np.in1d(nmidx, matches)].unique | ||
|
|
||
|
|
||
| class NucleicSelection(Selection): | ||
|
|
@@ -850,23 +878,32 @@ class NucleicSelection(Selection): | |
|
|
||
| .. versionchanged:: 0.8 | ||
| additional Gromacs selections | ||
| .. versionchanged:: 2.0.0 | ||
| nucl_res changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'nucleic' | ||
|
|
||
| nucl_res = np.array([ | ||
| nucl_res = { | ||
| 'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA', | ||
| 'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G', | ||
| 'DA5', 'DC5', 'DG5', 'DT5', | ||
| 'DA3', 'DC3', 'DG3', 'DT3', | ||
| 'RA5', 'RU5', 'RG5', 'RC5', | ||
| 'RA3', 'RU3', 'RG3', 'RC3' | ||
| ]) | ||
| } | ||
|
|
||
| def __init__(self, parser, tokens): | ||
| pass | ||
|
|
||
| def apply(self, group): | ||
| mask = np.in1d(group.resnames, self.nucl_res) | ||
| resnames = group.universe._topology.resnames | ||
| nmidx = resnames.nmidx[group.resindices] | ||
|
|
||
| matches = [ix for (nm, ix) in resnames.namedict.items() | ||
| if nm in self.nucl_res] | ||
| mask = np.in1d(nmidx, matches) | ||
|
|
||
| return group[mask].unique | ||
|
|
||
|
|
||
|
|
@@ -875,29 +912,65 @@ class BackboneSelection(ProteinSelection): | |
|
|
||
| This excludes OT* on C-termini | ||
| (which are included by, eg VMD's backbone selection). | ||
|
|
||
orbeckst marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| .. versionchanged:: 2.0.0 | ||
| bb_atoms changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'backbone' | ||
| bb_atoms = np.array(['N', 'CA', 'C', 'O']) | ||
| bb_atoms = {'N', 'CA', 'C', 'O'} | ||
|
|
||
| def apply(self, group): | ||
| mask = np.in1d(group.names, self.bb_atoms) | ||
| mask &= np.in1d(group.resnames, self.prot_res) | ||
| return group[mask].unique | ||
| atomnames = group.universe._topology.names | ||
| resnames = group.universe._topology.resnames | ||
|
|
||
| # filter by atom names | ||
| name_matches = [ix for (nm, ix) in atomnames.namedict.items() | ||
| if nm in self.bb_atoms] | ||
| nmidx = atomnames.nmidx[group.ix] | ||
| group = group[np.in1d(nmidx, name_matches)] | ||
|
|
||
| # filter by resnames | ||
| resname_matches = [ix for (nm, ix) in resnames.namedict.items() | ||
| if nm in self.prot_res] | ||
| nmidx = resnames.nmidx[group.resindices] | ||
| group = group[np.in1d(nmidx, resname_matches)] | ||
|
|
||
| return group.unique | ||
|
|
||
|
|
||
| class NucleicBackboneSelection(NucleicSelection): | ||
| """Contains all atoms with name "P", "C5'", C3'", "O3'", "O5'". | ||
|
|
||
| These atoms are only recognized if they are in a residue matched | ||
| by the :class:`NucleicSelection`. | ||
|
|
||
orbeckst marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| .. versionchanged:: 2.0.0 | ||
| bb_atoms changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'nucleicbackbone' | ||
| bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"]) | ||
| bb_atoms = {"P", "C5'", "C3'", "O3'", "O5'"} | ||
|
|
||
| def apply(self, group): | ||
| mask = np.in1d(group.names, self.bb_atoms) | ||
| mask &= np.in1d(group.resnames, self.nucl_res) | ||
| return group[mask].unique | ||
| atomnames = group.universe._topology.names | ||
| resnames = group.universe._topology.resnames | ||
|
|
||
| # filter by atom names | ||
| name_matches = [ix for (nm, ix) in atomnames.namedict.items() | ||
| if nm in self.bb_atoms] | ||
| nmidx = atomnames.nmidx[group.ix] | ||
| group = group[np.in1d(nmidx, name_matches)] | ||
|
|
||
| # filter by resnames | ||
| resname_matches = [ix for (nm, ix) in resnames.namedict.items() | ||
| if nm in self.nucl_res] | ||
| nmidx = resnames.nmidx[group.resindices] | ||
| group = group[np.in1d(nmidx, resname_matches)] | ||
|
|
||
| return group.unique | ||
|
|
||
|
|
||
| class BaseSelection(NucleicSelection): | ||
|
|
@@ -907,29 +980,65 @@ class BaseSelection(NucleicSelection): | |
|
|
||
| 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', | ||
| 'O6','N2','N6', 'O2','N4','O4','C5M' | ||
|
|
||
orbeckst marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| .. versionchanged:: 2.0.0 | ||
| base_atoms changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'nucleicbase' | ||
| base_atoms = np.array([ | ||
| base_atoms = { | ||
| 'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6', | ||
| 'O6', 'N2', 'N6', | ||
| 'O2', 'N4', 'O4', 'C5M']) | ||
| 'O2', 'N4', 'O4', 'C5M'} | ||
|
|
||
| def apply(self, group): | ||
| mask = np.in1d(group.names, self.base_atoms) | ||
| mask &= np.in1d(group.resnames, self.nucl_res) | ||
| return group[mask].unique | ||
| atomnames = group.universe._topology.names | ||
| resnames = group.universe._topology.resnames | ||
|
|
||
| # filter by atom names | ||
| name_matches = [ix for (nm, ix) in atomnames.namedict.items() | ||
| if nm in self.base_atoms] | ||
| nmidx = atomnames.nmidx[group.ix] | ||
| group = group[np.in1d(nmidx, name_matches)] | ||
|
|
||
| # filter by resnames | ||
| resname_matches = [ix for (nm, ix) in resnames.namedict.items() | ||
| if nm in self.nucl_res] | ||
| nmidx = resnames.nmidx[group.resindices] | ||
| group = group[np.in1d(nmidx, resname_matches)] | ||
|
|
||
| return group.unique | ||
|
|
||
|
|
||
| class NucleicSugarSelection(NucleicSelection): | ||
| """Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'. | ||
|
|
||
orbeckst marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| .. versionchanged:: 2.0.0 | ||
| sug_atoms changed to set (from numpy array) | ||
| performance improved by ~100x on larger systems | ||
| """ | ||
| token = 'nucleicsugar' | ||
| sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"]) | ||
| sug_atoms = {"C1'", "C2'", "C3'", "C4'", "O4'"} | ||
|
|
||
| def apply(self, group): | ||
| mask = np.in1d(group.names, self.sug_atoms) | ||
| mask &= np.in1d(group.resnames, self.nucl_res) | ||
| return group[mask].unique | ||
| atomnames = group.universe._topology.names | ||
| resnames = group.universe._topology.resnames | ||
|
|
||
| # filter by atom names | ||
| name_matches = [ix for (nm, ix) in atomnames.namedict.items() | ||
| if nm in self.sug_atoms] | ||
| nmidx = atomnames.nmidx[group.ix] | ||
| group = group[np.in1d(nmidx, name_matches)] | ||
|
|
||
| # filter by resnames | ||
| resname_matches = [ix for (nm, ix) in resnames.namedict.items() | ||
| if nm in self.nucl_res] | ||
| nmidx = resnames.nmidx[group.resindices] | ||
| group = group[np.in1d(nmidx, resname_matches)] | ||
|
|
||
| return group.unique | ||
|
|
||
|
|
||
| class PropertySelection(Selection): | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.