MDAnalysis · lilyminium · Nov 16, 2020 · Jan 5, 2020 · Jan 6, 2020 · Jan 6, 2020
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -20,6 +20,10 @@ The rules for this file:
   * 2.0.0
 
 Fixes
+  * PDB parser now assigns empty records for partially missing/invalid elements
+    instead of not assigning the elements attribute at all (Issue #2422)
+  * PDB writer now uses elements attribute instead of guessing elements from
+    atom name (Issue #2423)
   * Cleanup and parametrization of test_atomgroup.py (Issue #2995)
   * The methods provided by topology attributes now appear in the
     documentation (Issue #1845)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
@@ -1033,6 +1033,9 @@ def _write_timestep(self, ts, multiframe=False):
            When only :attr:`record_types` attribute is present, instead of
            using ATOM_ for both ATOM_ and HETATM_, HETATM_ record
            types are properly written out (Issue #1753).
+           Writing now only uses the contents of the elements attribute
+           instead of guessing by default. If the elements are missing,
+           empty records are written out (Issue #2423).
 
         """
         atoms = self.obj.atoms
@@ -1067,6 +1070,7 @@ def get_attr(attrname, default):
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)
         atomnames = get_attr('names', 'X')
+        elements = get_attr('elements', ' ')
         record_types = get_attr('record_types', 'ATOM')
 
         # If reindex == False, we use the atom ids for the serial. We do not
@@ -1095,7 +1099,7 @@ def get_attr(attrname, default):
             vals['occupancy'] = occupancies[i]
             vals['tempFactor'] = tempfactors[i]
             vals['segID'] = segids[i][:4]
-            vals['element'] = guess_atom_element(atomnames[i].strip())[:2]
+            vals['element'] = elements[i][:2].upper()
 
             # record_type attribute, if exists, can be ATOM or HETATM
             try:

diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py
@@ -35,15 +35,13 @@
 :mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue
 numbers up to 99,999.
 
-TODO:
-    Add attributes to guess elements for non-physical or missing elements
 
 
 .. Note::
 
    The parser processes atoms and their names. Masses are guessed and set to 0
    if unknown. Partial charges are not set. Elements are parsed if they are
-   valid.
+   valid. If partially missing or incorrect, empty records are assigned.
 
 See Also
 --------
@@ -183,7 +181,9 @@ class PDBParser(TopologyReaderBase):
     .. versionchanged:: 1.0.0
        Added parsing of valid Elements
     .. versionchanged:: 2.0.0
-       Bonds attribute is not added if no bonds are present in PDB file
+       Bonds attribute is not added if no bonds are present in PDB file.
+       If elements are invalid or partially missing, empty elements records
+       are now assigned (Issue #2422).
     """
     format = ['PDB', 'ENT']
 
@@ -220,7 +220,6 @@ def _parseatoms(self):
         icodes = []
         tempfactors = []
         occupancies = []
-        atomtypes = []
 
         resids = []
         resnames = []
@@ -283,7 +282,6 @@ def _parseatoms(self):
                 tempfactors.append(float_or_default(line[60:66], 1.0))  # AKA bfactor
 
                 segids.append(line[66:76].strip())
-                atomtypes.append(line[76:78].strip())
 
         # Warn about wrapped serials
         if self._wrapped_serials:
@@ -311,28 +309,33 @@ def _parseatoms(self):
         # Guessed attributes
         # masses from types if they exist
         # OPT: We do this check twice, maybe could refactor to avoid this
-        if not any(atomtypes):
+        if not any(elements):
             atomtypes = guess_types(names)
             attrs.append(Atomtypes(atomtypes, guessed=True))
+            warnings.warn("Element information is missing, elements attribute "
+                          "will not be populated. If needed these can be "
+                          "guessed using MDAnalysis.topology.guessers.")
         else:
-            attrs.append(Atomtypes(np.array(atomtypes, dtype=object)))
+            # Feed atomtypes as raw element column, but validate elements
+            atomtypes = elements
+            attrs.append(Atomtypes(np.array(elements, dtype=object)))
+
+            validated_elements = []
+            for elem in elements:
+                if elem.capitalize() in SYMB2Z:
+                    validated_elements.append(elem.capitalize())
+                else:
+                    wmsg = (f"Unknown element {elem} found for some atoms. "
+                            f"These have been given an empty element record. "
+                            f"If needed they can be guessed using "
+                            f"MDAnalysis.topology.guessers.")
+                    warnings.warn(wmsg)
+                    validated_elements.append('')
+            attrs.append(Elements(np.array(validated_elements, dtype=object)))
 
         masses = guess_masses(atomtypes)
         attrs.append(Masses(masses, guessed=True))
 
-        # Getting element information from element column.
-        if all(elements):
-            element_set = set(i.capitalize() for i in set(elements))
-            if all(element in SYMB2Z for element in element_set):
-                element_list = [i.capitalize() for i in elements]
-                attrs.append(Elements(np.array(element_list, dtype=object)))
-            else:
-                warnings.warn("Invalid elements found in the PDB file, "
-                              "elements attributes will not be populated.")
-        else:
-            warnings.warn("Element information is absent or missing for a few "
-                          "atoms. Elements attributes will not be populated.")
-
         # Residue level stuff from here
         resids = np.array(resids, dtype=np.int32)
         resnames = np.array(resnames, dtype=object)

diff --git a/testsuite/MDAnalysisTests/analysis/test_hole2.py b/testsuite/MDAnalysisTests/analysis/test_hole2.py
@@ -301,16 +301,11 @@ def test_context_manager(self, universe, tmpdir):
 
     def test_output_level(self, tmpdir, universe):
         with tmpdir.as_cwd():
-            with pytest.warns(UserWarning) as rec:
+            with pytest.warns(UserWarning, match='needs to be < 3'):
                 h = hole2.HoleAnalysis(universe,
                                        output_level=100)
                 h.run(start=self.start,
                       stop=self.stop, random_seed=self.random_seed)
-            assert len(rec) == 5
-
-            assert any('needs to be < 3' in r.message.args[0] for r in rec)
-            assert any('has no dt information' in r.message.args[0] for r in rec)  # 2x
-            assert any('Unit cell dimensions not found.' in r.message.args[0] for r in rec)  # 2x
 
             # no profiles
             assert len(h.profiles) == 0

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -45,6 +45,16 @@
 IGNORE_NO_INFORMATION_WARNING = 'ignore:Found no information for attr:UserWarning'
 
 
+@pytest.fixture
+def dummy_universe_without_elements():
+    n_atoms = 5
+    u = make_Universe(size=(n_atoms, 1, 1), trajectory=True)
+    u.add_TopologyAttr('resnames', ['RES'])
+    u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4', 'NA'])
+    u.dimensions = [42, 42, 42, 90, 90, 90]
+    return u
+
+
 class TestPDBReader(_SingleFrameReader):
     __test__ = True
 
@@ -1028,7 +1038,7 @@ def test_atomname_alignment(self, writtenstuff):
 
     def test_atomtype_alignment(self, writtenstuff):
         result_line = ("ATOM      1  H5T GUA A   1       7.974   6.430   9.561"
-                       "  1.00  0.00      RNAA H\n")
+                       "  1.00  0.00      RNAA  \n")
         assert_equal(writtenstuff[9], result_line)
 
 
@@ -1161,6 +1171,63 @@ def test_partially_missing_cryst():
     assert_array_almost_equal(u.dimensions, 0.0)
 
 
+@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
+def test_write_no_atoms_elements(dummy_universe_without_elements):
+    """
+    If no element symbols are provided, the PDB writer guesses.
+    """
+    destination = StringIO()
+    with mda.coordinates.PDB.PDBWriter(destination) as writer:
+        writer.write(dummy_universe_without_elements.atoms)
+        content = destination.getvalue()
+    element_symbols = [
+        line[76:78].strip()
+        for line in content.splitlines()
+        if line[:6] == 'ATOM  '
+    ]
+    expectation = ['', '', '', '', '']
+    assert element_symbols == expectation
+
+
+@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
+def test_write_atom_elements(dummy_universe_without_elements):
+    """
+    If element symbols are provided, they are used when writing the file.
+
+    See `Issue 2423 <https://github.com/MDAnalysis/mdanalysis/issues/2423>`_.
+    """
+    elems = ['S', 'O', '', 'C', 'Na']
+    expectation = ['S', 'O', '', 'C', 'NA']
+    dummy_universe_with_elements = dummy_universe_without_elements
+    dummy_universe_with_elements.add_TopologyAttr('elements', elems)
+    destination = StringIO()
+    with mda.coordinates.PDB.PDBWriter(destination) as writer:
+        writer.write(dummy_universe_without_elements.atoms)
+        content = destination.getvalue()
+    element_symbols = [
+        line[76:78].strip()
+        for line in content.splitlines()
+        if line[:6] == 'ATOM  '
+    ]
+    assert element_symbols == expectation
+
+
+def test_elements_roundtrip(tmpdir):
+    """
+    Roundtrip test for PDB elements reading/writing.
+    """
+    u = mda.Universe(CONECT)
+    elements = u.atoms.elements
+
+    outfile = os.path.join(str(tmpdir), 'elements.pdb')
+    with mda.coordinates.PDB.PDBWriter(outfile) as writer:
+        writer.write(u.atoms)
+
+    u_written = mda.Universe(outfile)
+
+    assert_equal(elements, u_written.atoms.elements)
+
+
 def test_cryst_meaningless_warning():
     # issue 2599
     # FIXME: This message might change with Issue #2698

diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py
@@ -23,7 +23,6 @@
 from io import StringIO
 
 import pytest
-import warnings
 import numpy as np
 from numpy.testing import assert_equal
 import MDAnalysis as mda
@@ -66,10 +65,12 @@
     ("10267", 10267)
 ]
 
+
 @pytest.mark.parametrize('hybrid, integer', hybrid36)
 def test_hy36decode(hybrid, integer):
     assert mda.topology.PDBParser.hy36decode(5, hybrid) == integer
 
+
 class PDBBase(ParserBase):
     expected_attrs = ['ids', 'names', 'record_types', 'resids',
                       'resnames', 'altLocs', 'icodes', 'occupancies',
@@ -205,6 +206,7 @@ def test_PDB_record_types():
 ENDMDL
 """
 
+
 def test_PDB_no_resid():
     u = mda.Universe(StringIO(PDB_noresid), format='PDB')
 
@@ -213,6 +215,7 @@ def test_PDB_no_resid():
     # should have default resid of 1
     assert u.residues[0].resid == 1
 
+
 PDB_hex = """\
 REMARK For testing reading of hex atom numbers
 REMARK This has MODELs then hex atom numbers entries
@@ -226,6 +229,7 @@ def test_PDB_no_resid():
 END
 """
 
+
 def test_PDB_hex():
     u = mda.Universe(StringIO(PDB_hex), format='PDB')
     assert len(u.atoms) == 5
@@ -298,37 +302,25 @@ def test_PDB_elements():
     assert_equal(u.atoms.elements, element_list)
 
 
-PDB_missing_ele = """\
-REMARK For testing warnings in case of absent element information
-ATOM      1  N   ASN A   1      -8.901   4.127  -0.555  1.00  0.00
-ATOM      2  CA  ASN A   1      -8.608   3.135  -1.618  1.00  0.00
-ATOM      3  C   ASN A   1      -7.117   2.964  -1.897  1.00  0.00
-ATOM      4  O   ASN A   1      -6.634   1.849  -1.758  1.00  0.00
-TER       5
-HETATM    6 CU    CU A   2      03.000  00.000  00.000  1.00 00.00
-HETATM    7 FE    FE A   3      00.000  03.000  00.000  1.00 00.00
-HETATM    8 Mg    Mg A   4      03.000  03.000  03.000  1.00 00.00
-TER       9
-"""
-
+def test_missing_elements_noattribute():
+    """Check that:
 
-def test_missing_elements_warnings():
-    """The test checks whether it returns the appropriate warning on
-    encountering missing elements.
+    1) a warning is raised if elements are missing
+    2) the elements attribute is not set
     """
-    with pytest.warns(UserWarning) as record:
-        u = mda.Universe(StringIO(PDB_missing_ele), format='PDB')
-
-    assert len(record) == 1
-    assert record[0].message.args[0] == "Element information is absent or "\
-        "missing for a few atoms. Elements attributes will not be populated."
+    wmsg = ("Element information is missing, elements attribute will not be "
+            "populated")
+    with pytest.warns(UserWarning, match=wmsg):
+        u = mda.Universe(PDB_small)
+    with pytest.raises(AttributeError):
+        _ = u.atoms.elements
 
 
 PDB_wrong_ele = """\
 REMARK For testing warnings of wrong elements
 REMARK This file represent invalid elements in the elements column
 ATOM      1  N   ASN A   1      -8.901   4.127  -0.555  1.00  0.00           N
-ATOM      2  CA  ASN A   1      -8.608   3.135  -1.618  1.00  0.00           C
+ATOM      2  CA  ASN A   1      -8.608   3.135  -1.618  1.00  0.00
 ATOM      3  C   ASN A   1      -7.117   2.964  -1.897  1.00  0.00           C
 ATOM      4  O   ASN A   1      -6.634   1.849  -1.758  1.00  0.00           O
 ATOM      5  X   ASN A   1      -9.437   3.396  -2.889  1.00  0.00          XX
@@ -344,12 +336,12 @@ def test_wrong_elements_warnings():
     """The test checks whether there are invalid elements in the elements
     column which have been parsed and returns an appropriate warning.
     """
-    with pytest.warns(UserWarning) as record:
+    with pytest.warns(UserWarning, match='Unknown element XX found'):
         u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB')
-    
-    assert len(record) == 2
-    assert record[1].message.args[0] == "Invalid elements found in the PDB "\
-        "file, elements attributes will not be populated."
+
+    expected = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'],
+                        dtype=object)
+    assert_equal(u.atoms.elements, expected)
 
 
 def test_nobonds_error():