From 0f78b7caebab2c1eb3459ec9854cf1d88ad480f6 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 11:50:27 +1000 Subject: [PATCH 01/13] updated guessing --- package/MDAnalysis/topology/guessers.py | 25 +++++++++++++------ package/MDAnalysis/topology/tables.py | 5 ++++ .../MDAnalysisTests/topology/test_guessers.py | 4 +++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 748ed429c14..362e21c96ff 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -32,6 +32,7 @@ import numpy as np import warnings +import re from ..lib import distances from . import tables @@ -105,6 +106,7 @@ def guess_atom_type(atomname): """ return guess_atom_element(atomname) +SYMBOLS = re.compile(r'[0-9\*\+\-]') def guess_atom_element(atomname): """Guess the element of the atom from the name. @@ -127,13 +129,22 @@ def guess_atom_element(atomname): try: return tables.atomelements[atomname] except KeyError: - if atomname[0].isdigit(): - # catch 1HH etc - try: - return atomname[1] - except IndexError: - pass - return atomname[0] + # strip symbols + name = re.sub(SYMBOLS, '', atomname) + while name: + if name in tables.elements: + return name + elif name[:-1] in tables.elements: + return name[:-1] + elif name[1:] in tables.elements: + return name[1:] + if len(name) <= 2: + return name[0] + name = name[1:-1] + + # if it's numbers + return atomname + def guess_bonds(atoms, coords, box=None, **kwargs): diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py index fa68fa83e77..ba39a217119 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/topology/tables.py @@ -173,6 +173,11 @@ def kv2dict(s, convertor=str): #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) +elements = ['H', 'He', + 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', + 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'O', 'Ar', + 'K', 'Ca', 'Sc', 'Ti'] + #: Plain-text table with atomic masses in u. TABLE_MASSES = """ # masses for elements in atomic units (u) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 3a5fa05c6ef..5fff5113461 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -79,6 +79,10 @@ def test_guess_atom_element_singledigit(self): def test_guess_atom_element_1H(self): assert guessers.guess_atom_element('1H') == 'H' assert guessers.guess_atom_element('2H') == 'H' + + def test_guess_element_symbols(self): + assert guessers.guess_atom_element('AO5*') == 'O' + assert guessers.guess_atom_element('F-') == 'F' def test_guess_charge(): From 08c0fa99c8c2242201d1ab3efe2c46ee73780708 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 12:27:03 +1000 Subject: [PATCH 02/13] updated guessers --- package/MDAnalysis/topology/guessers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 362e21c96ff..da6dd07c8e5 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -141,7 +141,7 @@ def guess_atom_element(atomname): if len(name) <= 2: return name[0] name = name[1:-1] - + # if it's numbers return atomname From de5ebbd2e0a34b22d0763778564f6a599a88faa9 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 14:35:26 +1000 Subject: [PATCH 03/13] makes element guessing left to right --- package/MDAnalysis/topology/guessers.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index da6dd07c8e5..69c327e49e8 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -51,7 +51,8 @@ def guess_masses(atom_types): atom_masses : np.ndarray dtype float64 """ validate_atom_types(atom_types) - masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64) + masses = np.array([get_atom_mass(atom_t) + for atom_t in atom_types], dtype=np.float64) return masses @@ -71,7 +72,8 @@ def validate_atom_types(atom_types): try: tables.masses[atom_type] except KeyError: - warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type)) + warnings.warn( + "Failed to guess the mass for the following atom types: {}".format(atom_type)) def guess_types(atom_names): @@ -89,7 +91,6 @@ def guess_types(atom_names): return np.array([guess_atom_element(name) for name in atom_names], dtype=object) - def guess_atom_type(atomname): """Guess atom type from the name. @@ -106,8 +107,10 @@ def guess_atom_type(atomname): """ return guess_atom_element(atomname) + SYMBOLS = re.compile(r'[0-9\*\+\-]') + def guess_atom_element(atomname): """Guess the element of the atom from the name. @@ -140,13 +143,12 @@ def guess_atom_element(atomname): return name[1:] if len(name) <= 2: return name[0] - name = name[1:-1] - + name = name[:-1] # probably element is on left not right + # if it's numbers return atomname - def guess_bonds(atoms, coords, box=None, **kwargs): r"""Guess if bonds exist between two atoms based on their distance. From acb2f2bf31ae8429e5e0fec50ceb986e98912d77 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 14:36:17 +1000 Subject: [PATCH 04/13] updated tests --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index fb7a8460b26..1d695494c9a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -413,6 +413,7 @@ def test_slice_iteration(self, multiverse): err_msg="slicing did not produce the expected frames") def test_conect_bonds_conect(self, tmpdir, conect): + print(conect.atoms) assert_equal(len(conect.atoms), 1890) assert_equal(len(conect.bonds), 1922) From 827c6c17314954ae64d28b31b9e0ab0b90c1fcab Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 14:48:34 +1000 Subject: [PATCH 05/13] updated --- package/CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index f081c13d853..ed1ec132ca2 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -21,6 +21,8 @@ mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, * 0.20.0 Enhancements + * improved atom element guessing in topology.guessers to check for elements + after the first element (#2313) * added position averaging transformation that makes use of the transformations API (PR #2208) * added find_hydrogen_donors to analysis.bonds.hbond_autocorrel to From 65b45954e0cc04cc0efc2ea2666c98b3cb3779c5 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 5 Aug 2019 14:57:20 +1000 Subject: [PATCH 06/13] changed elif to if --- package/MDAnalysis/topology/guessers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 69c327e49e8..9fb92d20eaf 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -137,9 +137,9 @@ def guess_atom_element(atomname): while name: if name in tables.elements: return name - elif name[:-1] in tables.elements: + if name[:-1] in tables.elements: return name[:-1] - elif name[1:] in tables.elements: + if name[1:] in tables.elements: return name[1:] if len(name) <= 2: return name[0] From c0ba255ad325546e03333e3250b0f0566e6e3f06 Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Mon, 5 Aug 2019 15:00:17 +1000 Subject: [PATCH 07/13] removed auto formatting --- package/MDAnalysis/topology/guessers.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index 9fb92d20eaf..eafb5bdc6c1 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -51,8 +51,7 @@ def guess_masses(atom_types): atom_masses : np.ndarray dtype float64 """ validate_atom_types(atom_types) - masses = np.array([get_atom_mass(atom_t) - for atom_t in atom_types], dtype=np.float64) + masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64) return masses @@ -72,8 +71,7 @@ def validate_atom_types(atom_types): try: tables.masses[atom_type] except KeyError: - warnings.warn( - "Failed to guess the mass for the following atom types: {}".format(atom_type)) + warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type)) def guess_types(atom_names): @@ -110,7 +108,6 @@ def guess_atom_type(atomname): SYMBOLS = re.compile(r'[0-9\*\+\-]') - def guess_atom_element(atomname): """Guess the element of the atom from the name. From 94fbdf9080971157ccb090080de20b822f3afe25 Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Mon, 5 Aug 2019 15:00:44 +1000 Subject: [PATCH 08/13] Update test_pdb.py --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 1d695494c9a..fb7a8460b26 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -413,7 +413,6 @@ def test_slice_iteration(self, multiverse): err_msg="slicing did not produce the expected frames") def test_conect_bonds_conect(self, tmpdir, conect): - print(conect.atoms) assert_equal(len(conect.atoms), 1890) assert_equal(len(conect.bonds), 1922) From 0df4964e47916c83abb5ae579f75dffc2aa4399d Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Tue, 6 Aug 2019 20:58:46 +1000 Subject: [PATCH 09/13] uppercased elements --- package/MDAnalysis/topology/tables.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py index ba39a217119..0719386e4c5 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/topology/tables.py @@ -173,10 +173,10 @@ def kv2dict(s, convertor=str): #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) -elements = ['H', 'He', - 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', - 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'O', 'Ar', - 'K', 'Ca', 'Sc', 'Ti'] +elements = ['H', 'HE', + 'Li', 'BE', 'B', 'C', 'N', 'O', 'F', 'NE', + 'NA', 'MG', 'AL', 'SI', 'P', 'S', 'O', 'AR', + 'K', 'CA', 'SC', 'TI'] #: Plain-text table with atomic masses in u. TABLE_MASSES = """ From 8fe5708b166256b1c13a4a6440c9c625a79e5b12 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Wed, 7 Aug 2019 09:22:20 +1000 Subject: [PATCH 10/13] updated and sorted elements --- package/MDAnalysis/topology/tables.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/topology/tables.py index 0719386e4c5..62886ee6823 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/topology/tables.py @@ -77,6 +77,7 @@ def kv2dict(s, convertor=str): d[values[0]] = convertor(values[1]) return d + #: Table with hard-coded special atom names, used for guessing atom types #: with :func:`MDAnalysis.topology.core.guess_atom_element`. TABLE_ATOMELEMENTS = """ @@ -173,10 +174,10 @@ def kv2dict(s, convertor=str): #: with :func:`MDAnalysis.topology.core.guess_atom_type`. atomelements = kv2dict(TABLE_ATOMELEMENTS) -elements = ['H', 'HE', - 'Li', 'BE', 'B', 'C', 'N', 'O', 'F', 'NE', - 'NA', 'MG', 'AL', 'SI', 'P', 'S', 'O', 'AR', - 'K', 'CA', 'SC', 'TI'] +elements = ['H', + 'LI', 'BE', 'B', 'C', 'N', 'O', 'F', + 'NA', 'MG', 'AL', 'P', 'SI', 'S', 'CL', + 'K'] #: Plain-text table with atomic masses in u. TABLE_MASSES = """ From aff2c444447b9edc1bc52d6c51eab2ce593475b5 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Tue, 13 Aug 2019 11:42:40 +1000 Subject: [PATCH 11/13] refined element guessing --- package/MDAnalysis/topology/guessers.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py index eafb5bdc6c1..a9b52c36198 100644 --- a/package/MDAnalysis/topology/guessers.py +++ b/package/MDAnalysis/topology/guessers.py @@ -106,7 +106,8 @@ def guess_atom_type(atomname): return guess_atom_element(atomname) -SYMBOLS = re.compile(r'[0-9\*\+\-]') +NUMBERS = re.compile(r'[0-9]') # match numbers +SYMBOLS = re.compile(r'[\*\+\-]') # match *, +, - def guess_atom_element(atomname): """Guess the element of the atom from the name. @@ -129,8 +130,9 @@ def guess_atom_element(atomname): try: return tables.atomelements[atomname] except KeyError: - # strip symbols - name = re.sub(SYMBOLS, '', atomname) + # strip symbols and numbers + no_symbols = re.sub(SYMBOLS, '', atomname) + name = re.sub(NUMBERS, '', no_symbols).upper() while name: if name in tables.elements: return name @@ -143,7 +145,7 @@ def guess_atom_element(atomname): name = name[:-1] # probably element is on left not right # if it's numbers - return atomname + return no_symbols def guess_bonds(atoms, coords, box=None, **kwargs): From 99a132e259feb53eef8476f6da1f29fd2fff6dbd Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Tue, 13 Aug 2019 11:43:11 +1000 Subject: [PATCH 12/13] added more tests --- .../MDAnalysisTests/topology/test_guessers.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py index 5fff5113461..cf7c69e8c1e 100644 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ b/testsuite/MDAnalysisTests/topology/test_guessers.py @@ -80,9 +80,19 @@ def test_guess_atom_element_1H(self): assert guessers.guess_atom_element('1H') == 'H' assert guessers.guess_atom_element('2H') == 'H' - def test_guess_element_symbols(self): - assert guessers.guess_atom_element('AO5*') == 'O' - assert guessers.guess_atom_element('F-') == 'F' + @pytest.mark.parametrize('name, element', ( + ('AO5*', 'O'), + ('F-', 'F'), + ('HB1', 'H'), + ('OC2', 'O'), + ('1he2', 'H'), + ('3hg2', 'H'), + ('OH-', 'O'), + ('HO', 'H'), + ('he', 'H') + )) + def test_guess_element_from_name(self, name, element): + assert guessers.guess_atom_element(name) == element def test_guess_charge(): From 834825b485c3a2687e82e3f4e7c3585ff3961960 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 19 Aug 2019 12:32:50 +1000 Subject: [PATCH 13/13] added name --- package/AUTHORS | 1 + package/CHANGELOG | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/package/AUTHORS b/package/AUTHORS index e412dac67fe..861814a0a1f 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -121,6 +121,7 @@ Chronological list of authors - Yibo Zhang - Luís Pedro Borges Araújo - Abhishek A. Kognole + - Lily Wang External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index ed1ec132ca2..ead1512c07a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,7 +16,7 @@ The rules for this file: mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, dpadula85, jbarnoud, manuel.nuno.melo, richardjgowers, mattwthompson, ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith, Lp0lp, - IAlibay, tyler.je.reddy, aakognole + IAlibay, tyler.je.reddy, aakognole, lilyminium * 0.20.0