diff --git a/package/AUTHORS b/package/AUTHORS index 79e2ddc5643..355371703a6 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -122,6 +122,7 @@ Chronological list of authors - Luís Pedro Borges Araújo - Abhishek A. Kognole - Rocco Meli + - Lily Wang External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index ceaa559a408..2814d88f8cc 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,11 +16,13 @@ The rules for this file: mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, dpadula85, jbarnoud, manuel.nuno.melo, richardjgowers, mattwthompson, ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith, Lp0lp, - IAlibay, tyler.je.reddy, aakognole, RMeli + IAlibay, tyler.je.reddy, aakognole, RMeli, lilyminium * 0.20.0 Enhancements + * improved atom element guessing in topology.guessers to check for elements + after the first element (#2313) * added the zero-based index selection keyword (Issue #1959) * added position averaging transformation that makes use of the transformations API (PR #2208) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 748ed429c14..a9b52c36198 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -32,6 +32,7 @@ import numpy as np import warnings +import re from ..lib import distances from . import tables @@ -88,7 +89,6 @@ def guess_types(atom_names): return np.array([guess_atom_element(name) for name in atom_names], dtype=object) - def guess_atom_type(atomname): """Guess atom type from the name. @@ -106,6 +106,9 @@ def guess_atom_type(atomname): return guess_atom_element(atomname) +NUMBERS = re.compile(r'[0-9]') # match numbers +SYMBOLS = re.compile(r'[\*\+\-]') # match *, +, - + def guess_atom_element(atomname): """Guess the element of the atom from the name. @@ -127,13 +130,22 @@ def guess_atom_element(atomname): try: return tables.atomelements[atomname] except KeyError: - if atomname[0].isdigit(): - # catch 1HH etc - try: - return atomname[1] - except IndexError: - pass - return atomname[0] + # strip symbols and numbers + no_symbols = re.sub(SYMBOLS, '', atomname) + name = re.sub(NUMBERS, '', no_symbols).upper() + while name: + if name in tables.elements: + return name + if name[:-1] in tables.elements: + return name[:-1] + if name[1:] in tables.elements: + return name[1:] + if len(name) <= 2: + return name[0] + name = name[:-1] # probably element is on left not right + + # if it's numbers + return no_symbols def guess_bonds(atoms, coords, box=None, **kwargs): diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py index fa68fa83e77..62886ee6823 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/topology/tables.py @@ -77,6 +77,7 @@ def kv2dict(s, convertor=str): d[values[0]] = convertor(values[1]) return d + #: Table with hard-coded special atom names, used for guessing atom types #: with :func:`MDAnalysis.topology.core.guess_atom_element`. TABLE_ATOMELEMENTS = """ @@ -173,6 +174,11 @@ def kv2dict(s, convertor=str): #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) +elements = ['H', + 'LI', 'BE', 'B', 'C', 'N', 'O', 'F', + 'NA', 'MG', 'AL', 'P', 'SI', 'S', 'CL', + 'K'] + #: Plain-text table with atomic masses in u. TABLE_MASSES = """ # masses for elements in atomic units (u) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 3a5fa05c6ef..cf7c69e8c1e 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -79,6 +79,20 @@ def test_guess_atom_element_singledigit(self): def test_guess_atom_element_1H(self): assert guessers.guess_atom_element('1H') == 'H' assert guessers.guess_atom_element('2H') == 'H' + + @pytest.mark.parametrize('name, element', ( + ('AO5*', 'O'), + ('F-', 'F'), + ('HB1', 'H'), + ('OC2', 'O'), + ('1he2', 'H'), + ('3hg2', 'H'), + ('OH-', 'O'), + ('HO', 'H'), + ('he', 'H') + )) + def test_guess_element_from_name(self, name, element): + assert guessers.guess_atom_element(name) == element def test_guess_charge():