From 6fa9bbc84012bd776e3f1be206943819272a82b3 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Sun, 5 Jan 2020 19:43:13 +0000 Subject: [PATCH 01/13] Add tests for elements attribute from PDB See #2422 --- .../MDAnalysisTests/topology/test_pdb.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index fe2490b5c42..bcf3c0f2b6e 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -25,6 +25,7 @@ from six.moves import StringIO import pytest +import numpy as np from numpy.testing import assert_equal import MDAnalysis as mda @@ -224,6 +225,7 @@ def test_PDB_no_resid(): END """ + def test_PDB_hex(): u = mda.Universe(StringIO(PDB_hex), format='PDB') assert len(u.atoms) == 5 @@ -233,6 +235,7 @@ def test_PDB_hex(): assert u.atoms[3].id == 100002 assert u.atoms[4].id == 100003 + @pytest.mark.filterwarnings("error") def test_PDB_metals(): from MDAnalysis.topology import tables @@ -244,3 +247,98 @@ def test_PDB_metals(): assert u.atoms[1].mass == pytest.approx(tables.masses["FE"]) assert u.atoms[2].mass == pytest.approx(tables.masses["CA"]) assert u.atoms[3].mass == pytest.approx(tables.masses["MG"]) + + +# This is a protein residue and a ligand from PDB_full and the metals +# from PDB_metals +PDB_elements = """\ +ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 N +ATOM 2 CA PRO A 1 -0.540 39.114 18.241 1.00 23.00 C +ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 C +ATOM 4 O PRO A 1 1.136 38.550 19.843 1.00 22.20 O +ATOM 5 CB PRO A 1 -0.602 38.136 17.053 1.00 23.21 C +ATOM 6 CG PRO A 1 0.504 38.548 16.115 1.00 24.23 C +ATOM 7 CD PRO A 1 0.728 39.999 16.363 1.00 23.31 C +TER 8 +HETATM 9 CU CU A 9 00.000 00.000 00.000 1.00 00.00 Cu +HETATM 10 FE FE A 10 03.000 03.000 03.000 1.00 00.00 Fe +HETATM 11 Ca Ca A 11 03.000 03.000 03.000 1.00 00.00 Ca +HETATM 12 Mg Mg A 12 03.000 03.000 03.000 1.00 00.00 Mg +TER 13 +HETATM 1609 S DMS A 101 19.762 39.489 18.350 1.00 25.99 S +HETATM 1610 O DMS A 101 19.279 37.904 18.777 1.00 23.69 O +HETATM 1611 C1 DMS A 101 21.344 39.260 17.532 1.00 24.07 C +HETATM 1612 C2 DMS A 101 18.750 40.066 17.029 1.00 20.50 C +HETATM 1613 S DMS A 102 22.157 39.211 12.217 1.00 27.26 S +HETATM 1614 O DMS A 102 20.622 38.811 11.702 1.00 25.51 O +HETATM 1615 C1 DMS A 102 22.715 40.764 11.522 1.00 26.00 C +HETATM 1616 C2 DMS A 102 22.343 39.515 13.971 1.00 25.46 C +TER 1617 +""" + + +def test_read_atom_elements(): + """ + If element symbols are provided, they are exposed as the 'elements' + topology attribute. + + `Issue 2422 `_ + """ + u = mda.Universe(StringIO(PDB_elements), format='PDB') + expected = np.array([ + 'N', 'C', 'C', 'O', 'C', 'C', 'C', + 'Cu', 'Fe', 'Ca', 'Mg', + 'S', 'O', 'C', 'C', 'S', 'O', 'C', 'C', + ], dtype=object) + assert np.all(u.atoms.elements == expected) + + +PDB_elements_partial = """\ +ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 +ATOM 2 CA PRO A 1 -0.540 39.114 18.241 1.00 23.00 C +ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 +ATOM 4 O PRO A 1 1.136 38.550 19.843 1.00 22.20 O +""" + + +def test_read_partial_atom_elements(): + """ + If element symbols are provided but some are missing, the symbols are + exposed as the 'elements' topology attribute, the missing symbols are + defaulted to an empty string, and a warning is issued. + """ + with pytest.warns(UserWarning) as records: + u = mda.Universe(StringIO(PDB_elements_partial), format='PDB') + # We expect only one warning because of the missing element symbols, + # though there may be other warnings that are unrelated. + assert len(records) >= 1 + warning_is_about_elements = [ + 'element' in str(record.message) for record in records + ] + print([str(record.message) for record in records]) + assert any(warning_is_about_elements) + expected = np.array(['', 'C', '', 'O'], dtype=object) + assert np.all(u.atoms.elements == expected) + + +def test_read_no_atom_elements(): + """ + If element symbols are not provided, the 'elements' topology attribute + is not created. + + `Issue 2422 `_ + """ + u = mda.Universe(PDB_small) + with pytest.raises(AttributeError): + _ = u.atoms.elements + + +def test_read_no_atom_elements_no_warning(recwarn): + """ + If no element symbols are provided, no waring is issued. + """ + u = mda.Universe(PDB_small) + warning_is_about_elements = [ + 'element' in str(record.message) for record in recwarn + ] + assert not any(warning_is_about_elements) From daf6af244d9a81339169e77ec383837a6c2c5e1c Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Mon, 6 Jan 2020 18:54:49 +0000 Subject: [PATCH 02/13] Populate the 'elements' topology attribute when reading PDB --- package/MDAnalysis/topology/PDBParser.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 4eaf20e29c6..154756a4b79 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -72,6 +72,7 @@ Bonds, ChainIDs, Atomtypes, + Elements, ICodes, Masses, Occupancies, @@ -94,6 +95,7 @@ def float_or_default(val, default): DIGITS_UPPER_VALUES = dict([pair for pair in zip(DIGITS_UPPER, range(36))]) DIGITS_LOWER_VALUES = dict([pair for pair in zip(DIGITS_LOWER, range(36))]) + def decode_pure(digits_values, s): """Decodes the string s using the digit, value associations for each character. @@ -299,6 +301,13 @@ def _parseatoms(self): attrs.append(Atomtypes(atomtypes, guessed=True)) else: attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) + attrs.append(Elements(np.array(atomtypes, dtype=object))) + if not all(atomtypes): + n_undefined = sum(not atom_type for atom_type in atomtypes) + warnings.warn("The element symbol for some but not all atoms " + "are missing, the symbol is missing for " + "{}/{} atoms." + .format(n_undefined, len(atomtypes))) masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) From 2cc3a7ea718b66793e7d90b1f0c7e6c31268fd42 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Mon, 6 Jan 2020 19:42:23 +0000 Subject: [PATCH 03/13] Add tests for #2423 --- .../MDAnalysisTests/coordinates/test_pdb.py | 47 +++++++++++++++++++ .../MDAnalysisTests/topology/test_pdb.py | 2 +- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index fb7a8460b26..ffff55cc0b3 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -44,6 +44,15 @@ assert_almost_equal) +@pytest.fixture +def dummy_universe_without_elements(): + n_atoms = 4 + u = mda.Universe.empty(n_atoms=n_atoms, trajectory=True) + u.add_TopologyAttr('resnames', ['RES']) + u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4']) + return u + + class TestPDBReader(_SingleFrameReader): __test__ = True def setUp(self): @@ -930,3 +939,41 @@ def test_partially_missing_cryst(): assert len(u.atoms) == 3 assert len(u.trajectory) == 2 assert_array_almost_equal(u.dimensions, 0.0) + + +def test_write_no_atoms_elements(dummy_universe_without_elements): + """ + If no element symbols are provided, the PDB writer guesses. + """ + destination = StringIO() + with mda.coordinates.PDB.PDBWriter(destination) as writer: + writer.write(dummy_universe_without_elements.atoms) + content = destination.getvalue() + element_symbols = [ + line[76:78].strip() + for line in content.splitlines() + if line[:6] == 'ATOM ' + ] + expectation = ['C', 'O', 'N', 'S'] + assert element_symbols == expectation + + +def test_write_atom_elements(dummy_universe_without_elements): + """ + If element symbols are provided, they are used when writing the file. + + See `Issue 2423 `_. + """ + expectation = ['S', 'O', '', 'C'] + dummy_universe_with_elements = dummy_universe_without_elements + dummy_universe_with_elements.add_TopologyAttr('elements', expectation) + destination = StringIO() + with mda.coordinates.PDB.PDBWriter(destination) as writer: + writer.write(dummy_universe_without_elements.atoms) + content = destination.getvalue() + element_symbols = [ + line[76:78].strip() + for line in content.splitlines() + if line[:6] == 'ATOM ' + ] + assert element_symbols == expectation diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index bcf3c0f2b6e..61665942976 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -335,7 +335,7 @@ def test_read_no_atom_elements(): def test_read_no_atom_elements_no_warning(recwarn): """ - If no element symbols are provided, no waring is issued. + If no element symbols are provided, no warning is issued. """ u = mda.Universe(PDB_small) warning_is_about_elements = [ From 60088bd8561ac9ebf51d1665f537db97c2281f93 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Thu, 9 Jan 2020 18:30:01 +0000 Subject: [PATCH 04/13] Use elements topology attr in PDB writer when available --- package/MDAnalysis/coordinates/PDB.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index ef530e8b4f7..1bd981a50d9 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -927,6 +927,13 @@ def get_attr(attrname, default): occupancies = get_attr('occupancies', 1.0) tempfactors = get_attr('tempfactors', 0.0) atomnames = get_attr('names', 'X') + try: + elements = getattr(atoms, 'elements') + except AttributeError: + elements = [ + guess_atom_element(name.strip()) + for name in atomnames + ] for i, atom in enumerate(atoms): vals = {} @@ -941,7 +948,7 @@ def get_attr(attrname, default): vals['occupancy'] = occupancies[i] vals['tempFactor'] = tempfactors[i] vals['segID'] = segids[i][:4] - vals['element'] = guess_atom_element(atomnames[i].strip())[:2] + vals['element'] = elements[i][:2] # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM self.pdbfile.write(self.fmt['ATOM'].format(**vals)) From c8717c58549e0f715d543b7c18bbfecdff099534 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Thu, 9 Jan 2020 19:08:21 +0000 Subject: [PATCH 05/13] Small simplification --- package/MDAnalysis/topology/PDBParser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 154756a4b79..432ac169c4b 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -303,7 +303,7 @@ def _parseatoms(self): attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) attrs.append(Elements(np.array(atomtypes, dtype=object))) if not all(atomtypes): - n_undefined = sum(not atom_type for atom_type in atomtypes) + n_undefined = not all(atomtypes) warnings.warn("The element symbol for some but not all atoms " "are missing, the symbol is missing for " "{}/{} atoms." From 59e26076b869040473d063c95e5657f1763478c6 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 20 Oct 2020 20:43:07 +0100 Subject: [PATCH 06/13] Updates PDB element parsing to avoid guessing --- package/MDAnalysis/coordinates/PDB.py | 1 + package/MDAnalysis/topology/PDBParser.py | 44 +++++++-------- .../MDAnalysisTests/coordinates/test_pdb.py | 56 ++++++++++++++++++- .../MDAnalysisTests/topology/test_pdb.py | 43 +++++++------- 4 files changed, 95 insertions(+), 49 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index c42c96efa19..4adc41a2f27 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1040,6 +1040,7 @@ def get_attr(attrname, default): occupancies = get_attr('occupancies', 1.0) tempfactors = get_attr('tempfactors', 0.0) atomnames = get_attr('names', 'X') + elements = get_attr('elements', ' ') record_types = get_attr('record_types', 'ATOM') for i, atom in enumerate(atoms): diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 8b715ab5478..4c1b822a6cb 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -220,7 +220,6 @@ def _parseatoms(self): icodes = [] tempfactors = [] occupancies = [] - atomtypes = [] resids = [] resnames = [] @@ -283,7 +282,6 @@ def _parseatoms(self): tempfactors.append(float_or_default(line[60:66], 1.0)) # AKA bfactor segids.append(line[66:76].strip()) - atomtypes.append(line[76:78].strip()) # Warn about wrapped serials if self._wrapped_serials: @@ -311,35 +309,33 @@ def _parseatoms(self): # Guessed attributes # masses from types if they exist # OPT: We do this check twice, maybe could refactor to avoid this - if not any(atomtypes): + if not any(elements): atomtypes = guess_types(names) attrs.append(Atomtypes(atomtypes, guessed=True)) + warnings.warn("Element information is missing, elements attribute " + "will not be populated. If needed these can be " + "guessed using MDAnalysis.topology.guessers.") else: - attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) - attrs.append(Elements(np.array(atomtypes, dtype=object))) - if not all(atomtypes): - n_undefined = not all(atomtypes) - warnings.warn("The element symbol for some but not all atoms " - "are missing, the symbol is missing for " - "{}/{} atoms." - .format(n_undefined, len(atomtypes))) + # Feed atomtypes as raw element column, but validate elements + atomtypes = elements + attrs.append(Atomtypes(np.array(elements, dtype=object))) + + validated_elements = [] + for elem in elements: + if elem.capitalize() in SYMB2Z: + validated_elements.append(elem.capitalize()) + else: + wmsg = (f"Unknown element {elem} found for some atoms. " + f"These have been given an empty element record. " + f"If needed they can be guessed using " + f"MDAnalysis.topology.guessers.") + warnings.warn(wmsg) + validated_elements.append('') + attrs.append(Elements(np.array(validated_elements, dtype=object))) masses = guess_masses(atomtypes) attrs.append(Masses(masses, guessed=True)) - # Getting element information from element column. - if all(elements): - element_set = set(i.capitalize() for i in set(elements)) - if all(element in SYMB2Z for element in element_set): - element_list = [i.capitalize() for i in elements] - attrs.append(Elements(np.array(element_list, dtype=object))) - else: - warnings.warn("Invalid elements found in the PDB file, " - "elements attributes will not be populated.") - else: - warnings.warn("Element information is absent or missing for a few " - "atoms. Elements attributes will not be populated.") - # Residue level stuff from here resids = np.array(resids, dtype=np.int32) resnames = np.array(resnames, dtype=object) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 5859525066c..de5e968eefc 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -970,7 +970,7 @@ def test_atomname_alignment(self, writtenstuff): def test_atomtype_alignment(self, writtenstuff): result_line = ("ATOM 1 H5T GUA A 1 7.974 6.430 9.561" - " 1.00 0.00 RNAA H\n") + " 1.00 0.00 RNAA \n") assert_equal(writtenstuff[9], result_line) @@ -1103,6 +1103,60 @@ def test_partially_missing_cryst(): assert_array_almost_equal(u.dimensions, 0.0) +def test_write_no_atoms_elements(dummy_universe_without_elements): + """ + If no element symbols are provided, the PDB writer guesses. + """ + destination = StringIO() + with mda.coordinates.PDB.PDBWriter(destination) as writer: + writer.write(dummy_universe_without_elements.atoms) + content = destination.getvalue() + element_symbols = [ + line[76:78].strip() + for line in content.splitlines() + if line[:6] == 'ATOM ' + ] + expectation = ['', '', '', ''] + assert element_symbols == expectation + + +def test_write_atom_elements(dummy_universe_without_elements): + """ + If element symbols are provided, they are used when writing the file. + + See `Issue 2423 `_. + """ + expectation = ['S', 'O', '', 'C'] + dummy_universe_with_elements = dummy_universe_without_elements + dummy_universe_with_elements.add_TopologyAttr('elements', expectation) + destination = StringIO() + with mda.coordinates.PDB.PDBWriter(destination) as writer: + writer.write(dummy_universe_without_elements.atoms) + content = destination.getvalue() + element_symbols = [ + line[76:78].strip() + for line in content.splitlines() + if line[:6] == 'ATOM ' + ] + assert element_symbols == expectation + + +def test_elements_roundtrip(tmpdir): + """ + Roundtrip test for PDB elements reading/writing. + """ + u = mda.Universe(CONECT) + elements = u.atoms.elements + + outfile = os.path.join(str(tmpdir), 'elements.pdb') + with mda.coordinates.PDB.PDBWriter(outfile) as writer: + writer.write(u.atoms) + + u_written = mda.Universe(outfile) + + assert_equal(elements, u_written.atoms.elements) + + def test_cryst_meaningless_warning(): # issue 2599 # FIXME: This message might change with Issue #2698 diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index f794364e77d..de498a5dad0 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -299,30 +299,26 @@ def test_PDB_elements(): assert_equal(u.atoms.elements, element_list) -PDB_missing_ele = """\ -REMARK For testing warnings in case of absent element information -ATOM 1 N ASN A 1 -8.901 4.127 -0.555 1.00 0.00 -ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00 -ATOM 3 C ASN A 1 -7.117 2.964 -1.897 1.00 0.00 -ATOM 4 O ASN A 1 -6.634 1.849 -1.758 1.00 0.00 -TER 5 -HETATM 6 CU CU A 2 03.000 00.000 00.000 1.00 00.00 -HETATM 7 FE FE A 3 00.000 03.000 00.000 1.00 00.00 -HETATM 8 Mg Mg A 4 03.000 03.000 03.000 1.00 00.00 -TER 9 +PDB_elements_partial = """\ +ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 +ATOM 2 CA PRO A 1 -0.540 39.114 18.241 1.00 23.00 C +ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 +ATOM 4 O PRO A 1 1.136 38.550 19.843 1.00 22.20 O """ -def test_missing_elements_warnings(): - """The test checks whether it returns the appropriate warning on - encountering missing elements. - """ - with pytest.warns(UserWarning) as record: - u = mda.Universe(StringIO(PDB_missing_ele), format='PDB') +def test_missing_elements_noattribute(): + """Check that: - assert len(record) == 1 - assert record[0].message.args[0] == "Element information is absent or "\ - "missing for a few atoms. Elements attributes will not be populated." + 1) a warning is raised if elements are missing + 2) the elements attribute is not set + """ + wmsg = ("Element information is missing, elements attribute will not be " + "populated") + with pytest.warns(UserWarning, match=wmsg): + u = mda.Universe(PDB_small) + with pytest.raises(AttributeError): + _ = u.atoms.elements PDB_wrong_ele = """\ @@ -345,12 +341,11 @@ def test_wrong_elements_warnings(): """The test checks whether there are invalid elements in the elements column which have been parsed and returns an appropriate warning. """ - with pytest.warns(UserWarning) as record: + with pytest.warns(UserWarning, match='Unknown element XX found'): u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB') - assert len(record) == 2 - assert record[1].message.args[0] == "Invalid elements found in the PDB "\ - "file, elements attributes will not be populated." + expected = np.array(['N', 'C', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], dtype=object) + assert_equal(u.atoms.elements, expected) def test_nobonds_error(): From 8e38a9ad5b5048f9d5d23afd17ea98cde00984dd Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 20 Oct 2020 21:07:04 +0100 Subject: [PATCH 07/13] Updates PDB elements docs and changelog --- package/CHANGELOG | 4 ++++ package/MDAnalysis/coordinates/PDB.py | 3 +++ package/MDAnalysis/topology/PDBParser.py | 8 ++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 81308c30d99..2cd70570b72 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -20,6 +20,10 @@ The rules for this file: * 2.0.0 Fixes + * PDB parser now assigns empty records for partially missing/invalid elements + instead of not assigning the elements attribute at all (Issue #2422) + * PDB writer now uses elements attribute instead of guessing elements from + atom name (Issue #2423) * AtomGroup.center now works correctly for compounds + unwrapping (Issue #2984) * Avoid using deprecated array indexing in topology attributes diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 4adc41a2f27..cc9d8b459ae 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1006,6 +1006,9 @@ def _write_timestep(self, ts, multiframe=False): When only :attr:`record_types` attribute is present, instead of using ATOM_ for both ATOM_ and HETATM_, HETATM_ record types are properly written out (Issue #1753). + Writing now only uses the contents of the elements attribute + instead of guessing by default. If the elements are missing, + empty records are written out (Issues #2423). """ atoms = self.obj.atoms diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 4c1b822a6cb..9c13ebaf669 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -35,15 +35,13 @@ :mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue numbers up to 99,999. -TODO: - Add attributes to guess elements for non-physical or missing elements .. Note:: The parser processes atoms and their names. Masses are guessed and set to 0 if unknown. Partial charges are not set. Elements are parsed if they are - valid. + valid, if partially missing or incorrect empty records are assigned. See Also -------- @@ -183,7 +181,9 @@ class PDBParser(TopologyReaderBase): .. versionchanged:: 1.0.0 Added parsing of valid Elements .. versionchanged:: 2.0.0 - Bonds attribute is not added if no bonds are present in PDB file + Bonds attribute is not added if no bonds are present in PDB file. + If elements are invalid or partially missing, empty elements records + are now assigned (Issue #2422). """ format = ['PDB', 'ENT'] From f174321bddedfc6ef58674c90ea269e7be77c0a1 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 20 Oct 2020 21:20:36 +0100 Subject: [PATCH 08/13] PEP8 fixes --- testsuite/MDAnalysisTests/topology/test_pdb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index de498a5dad0..0f65e98e3b3 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -300,9 +300,9 @@ def test_PDB_elements(): PDB_elements_partial = """\ -ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 +ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 ATOM 2 CA PRO A 1 -0.540 39.114 18.241 1.00 23.00 C -ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 +ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 ATOM 4 O PRO A 1 1.136 38.550 19.843 1.00 22.20 O """ @@ -344,7 +344,8 @@ def test_wrong_elements_warnings(): with pytest.warns(UserWarning, match='Unknown element XX found'): u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB') - expected = np.array(['N', 'C', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], dtype=object) + expected = np.array(['N', 'C', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], + dtype=object) assert_equal(u.atoms.elements, expected) From 3d97d564957926da107bea6208990207bf2d33c8 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Tue, 20 Oct 2020 22:27:58 +0100 Subject: [PATCH 09/13] Fixes failing hole2 warning test --- testsuite/MDAnalysisTests/analysis/test_hole2.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_hole2.py b/testsuite/MDAnalysisTests/analysis/test_hole2.py index c4c63d161cd..380eceba95e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hole2.py +++ b/testsuite/MDAnalysisTests/analysis/test_hole2.py @@ -301,16 +301,11 @@ def test_context_manager(self, universe, tmpdir): def test_output_level(self, tmpdir, universe): with tmpdir.as_cwd(): - with pytest.warns(UserWarning) as rec: + with pytest.warns(UserWarning, match='needs to be < 3'): h = hole2.HoleAnalysis(universe, output_level=100) h.run(start=self.start, stop=self.stop, random_seed=self.random_seed) - assert len(rec) == 5 - - assert any('needs to be < 3' in r.message.args[0] for r in rec) - assert any('has no dt information' in r.message.args[0] for r in rec) # 2x - assert any('Unit cell dimensions not found.' in r.message.args[0] for r in rec) # 2x # no profiles assert len(h.profiles) == 0 From 7223867fa2a83f97dfc9a54f0ac9ba823f01634f Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Thu, 22 Oct 2020 00:16:30 +0100 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com> --- package/MDAnalysis/coordinates/PDB.py | 2 +- package/MDAnalysis/topology/PDBParser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index e9e7fb1b95e..65eb144d22a 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1035,7 +1035,7 @@ def _write_timestep(self, ts, multiframe=False): types are properly written out (Issue #1753). Writing now only uses the contents of the elements attribute instead of guessing by default. If the elements are missing, - empty records are written out (Issues #2423). + empty records are written out (Issue #2423). """ atoms = self.obj.atoms diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index 9c13ebaf669..468d04dc985 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -41,7 +41,7 @@ The parser processes atoms and their names. Masses are guessed and set to 0 if unknown. Partial charges are not set. Elements are parsed if they are - valid, if partially missing or incorrect empty records are assigned. + valid. If partially missing or incorrect, empty records are assigned. See Also -------- From 357fda1a890d1f95fcab8845240df91e336b8ccd Mon Sep 17 00:00:00 2001 From: IAlibay Date: Thu, 22 Oct 2020 23:18:15 +0100 Subject: [PATCH 11/13] get rid of non-necessary warnings test_pdb coordinates --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 5e5987df61b..68d380d0b11 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -48,9 +48,10 @@ @pytest.fixture def dummy_universe_without_elements(): n_atoms = 4 - u = mda.Universe.empty(n_atoms=n_atoms, trajectory=True) + u = make_Universe(size=(n_atoms, 1, 1), trajectory=True) u.add_TopologyAttr('resnames', ['RES']) u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4']) + u.dimensions = [42, 42, 42, 90, 90, 90] return u @@ -1170,6 +1171,7 @@ def test_partially_missing_cryst(): assert_array_almost_equal(u.dimensions, 0.0) +@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING) def test_write_no_atoms_elements(dummy_universe_without_elements): """ If no element symbols are provided, the PDB writer guesses. @@ -1187,6 +1189,7 @@ def test_write_no_atoms_elements(dummy_universe_without_elements): assert element_symbols == expectation +@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING) def test_write_atom_elements(dummy_universe_without_elements): """ If element symbols are provided, they are used when writing the file. From 3d7857da495a999920bc053fd36f5d87d47659fc Mon Sep 17 00:00:00 2001 From: IAlibay Date: Thu, 22 Oct 2020 23:32:15 +0100 Subject: [PATCH 12/13] fix pdb topology tests and quick PEP8 fixes --- testsuite/MDAnalysisTests/topology/test_pdb.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index 0f65e98e3b3..28e2abfaf29 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -23,7 +23,6 @@ from io import StringIO import pytest -import warnings import numpy as np from numpy.testing import assert_equal import MDAnalysis as mda @@ -66,10 +65,12 @@ ("10267", 10267) ] + @pytest.mark.parametrize('hybrid, integer', hybrid36) def test_hy36decode(hybrid, integer): assert mda.topology.PDBParser.hy36decode(5, hybrid) == integer + class PDBBase(ParserBase): expected_attrs = ['ids', 'names', 'record_types', 'resids', 'resnames', 'altLocs', 'icodes', 'occupancies', @@ -205,6 +206,7 @@ def test_PDB_record_types(): ENDMDL """ + def test_PDB_no_resid(): u = mda.Universe(StringIO(PDB_noresid), format='PDB') @@ -213,6 +215,7 @@ def test_PDB_no_resid(): # should have default resid of 1 assert u.residues[0].resid == 1 + PDB_hex = """\ REMARK For testing reading of hex atom numbers REMARK This has MODELs then hex atom numbers entries @@ -299,14 +302,6 @@ def test_PDB_elements(): assert_equal(u.atoms.elements, element_list) -PDB_elements_partial = """\ -ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 -ATOM 2 CA PRO A 1 -0.540 39.114 18.241 1.00 23.00 C -ATOM 3 C PRO A 1 -0.028 38.397 19.491 1.00 22.34 -ATOM 4 O PRO A 1 1.136 38.550 19.843 1.00 22.20 O -""" - - def test_missing_elements_noattribute(): """Check that: @@ -325,7 +320,7 @@ def test_missing_elements_noattribute(): REMARK For testing warnings of wrong elements REMARK This file represent invalid elements in the elements column ATOM 1 N ASN A 1 -8.901 4.127 -0.555 1.00 0.00 N -ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00 C +ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00 ATOM 3 C ASN A 1 -7.117 2.964 -1.897 1.00 0.00 C ATOM 4 O ASN A 1 -6.634 1.849 -1.758 1.00 0.00 O ATOM 5 X ASN A 1 -9.437 3.396 -2.889 1.00 0.00 XX @@ -344,7 +339,7 @@ def test_wrong_elements_warnings(): with pytest.warns(UserWarning, match='Unknown element XX found'): u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB') - expected = np.array(['N', 'C', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], + expected = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], dtype=object) assert_equal(u.atoms.elements, expected) From bd220dcb54ea15aefa5407c176b7b14199a5c792 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Fri, 23 Oct 2020 00:08:49 +0100 Subject: [PATCH 13/13] Enfore upper elements in PDB files --- package/MDAnalysis/coordinates/PDB.py | 2 +- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 65eb144d22a..a9a377b7e86 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1099,7 +1099,7 @@ def get_attr(attrname, default): vals['occupancy'] = occupancies[i] vals['tempFactor'] = tempfactors[i] vals['segID'] = segids[i][:4] - vals['element'] = elements[i][:2] + vals['element'] = elements[i][:2].upper() # record_type attribute, if exists, can be ATOM or HETATM try: diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 68d380d0b11..445d8ec35a0 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -47,10 +47,10 @@ @pytest.fixture def dummy_universe_without_elements(): - n_atoms = 4 + n_atoms = 5 u = make_Universe(size=(n_atoms, 1, 1), trajectory=True) u.add_TopologyAttr('resnames', ['RES']) - u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4']) + u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4', 'NA']) u.dimensions = [42, 42, 42, 90, 90, 90] return u @@ -1185,7 +1185,7 @@ def test_write_no_atoms_elements(dummy_universe_without_elements): for line in content.splitlines() if line[:6] == 'ATOM ' ] - expectation = ['', '', '', ''] + expectation = ['', '', '', '', ''] assert element_symbols == expectation @@ -1196,9 +1196,10 @@ def test_write_atom_elements(dummy_universe_without_elements): See `Issue 2423 `_. """ - expectation = ['S', 'O', '', 'C'] + elems = ['S', 'O', '', 'C', 'Na'] + expectation = ['S', 'O', '', 'C', 'NA'] dummy_universe_with_elements = dummy_universe_without_elements - dummy_universe_with_elements.add_TopologyAttr('elements', expectation) + dummy_universe_with_elements.add_TopologyAttr('elements', elems) destination = StringIO() with mda.coordinates.PDB.PDBWriter(destination) as writer: writer.write(dummy_universe_without_elements.atoms)