Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ The rules for this file:
* 2.0.0

Fixes
* PDB parser now assigns empty records for partially missing/invalid elements
instead of not assigning the elements attribute at all (Issue #2422)
* PDB writer now uses elements attribute instead of guessing elements from
atom name (Issue #2423)
* Cleanup and parametrization of test_atomgroup.py (Issue #2995)
* The methods provided by topology attributes now appear in the
documentation (Issue #1845)
Expand Down
6 changes: 5 additions & 1 deletion package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,9 @@ def _write_timestep(self, ts, multiframe=False):
When only :attr:`record_types` attribute is present, instead of
using ATOM_ for both ATOM_ and HETATM_, HETATM_ record
types are properly written out (Issue #1753).
Writing now only uses the contents of the elements attribute
instead of guessing by default. If the elements are missing,
empty records are written out (Issue #2423).

"""
atoms = self.obj.atoms
Expand Down Expand Up @@ -1067,6 +1070,7 @@ def get_attr(attrname, default):
occupancies = get_attr('occupancies', 1.0)
tempfactors = get_attr('tempfactors', 0.0)
atomnames = get_attr('names', 'X')
elements = get_attr('elements', ' ')
record_types = get_attr('record_types', 'ATOM')

# If reindex == False, we use the atom ids for the serial. We do not
Expand Down Expand Up @@ -1095,7 +1099,7 @@ def get_attr(attrname, default):
vals['occupancy'] = occupancies[i]
vals['tempFactor'] = tempfactors[i]
vals['segID'] = segids[i][:4]
vals['element'] = guess_atom_element(atomnames[i].strip())[:2]
vals['element'] = elements[i][:2].upper()

# record_type attribute, if exists, can be ATOM or HETATM
try:
Expand Down
45 changes: 24 additions & 21 deletions package/MDAnalysis/topology/PDBParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,13 @@
:mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue
numbers up to 99,999.

TODO:
Add attributes to guess elements for non-physical or missing elements


.. Note::

The parser processes atoms and their names. Masses are guessed and set to 0
if unknown. Partial charges are not set. Elements are parsed if they are
valid.
valid. If partially missing or incorrect, empty records are assigned.

See Also
--------
Expand Down Expand Up @@ -183,7 +181,9 @@ class PDBParser(TopologyReaderBase):
.. versionchanged:: 1.0.0
Added parsing of valid Elements
.. versionchanged:: 2.0.0
Bonds attribute is not added if no bonds are present in PDB file
Bonds attribute is not added if no bonds are present in PDB file.
If elements are invalid or partially missing, empty elements records
are now assigned (Issue #2422).
"""
format = ['PDB', 'ENT']

Expand Down Expand Up @@ -220,7 +220,6 @@ def _parseatoms(self):
icodes = []
tempfactors = []
occupancies = []
atomtypes = []

resids = []
resnames = []
Expand Down Expand Up @@ -283,7 +282,6 @@ def _parseatoms(self):
tempfactors.append(float_or_default(line[60:66], 1.0)) # AKA bfactor

segids.append(line[66:76].strip())
atomtypes.append(line[76:78].strip())

# Warn about wrapped serials
if self._wrapped_serials:
Expand Down Expand Up @@ -311,28 +309,33 @@ def _parseatoms(self):
# Guessed attributes
# masses from types if they exist
# OPT: We do this check twice, maybe could refactor to avoid this
if not any(atomtypes):
if not any(elements):
atomtypes = guess_types(names)
attrs.append(Atomtypes(atomtypes, guessed=True))
warnings.warn("Element information is missing, elements attribute "
"will not be populated. If needed these can be "
"guessed using MDAnalysis.topology.guessers.")
else:
attrs.append(Atomtypes(np.array(atomtypes, dtype=object)))
# Feed atomtypes as raw element column, but validate elements
atomtypes = elements
attrs.append(Atomtypes(np.array(elements, dtype=object)))

validated_elements = []
for elem in elements:
if elem.capitalize() in SYMB2Z:
validated_elements.append(elem.capitalize())
else:
wmsg = (f"Unknown element {elem} found for some atoms. "
f"These have been given an empty element record. "
f"If needed they can be guessed using "
f"MDAnalysis.topology.guessers.")
warnings.warn(wmsg)
validated_elements.append('')
attrs.append(Elements(np.array(validated_elements, dtype=object)))

masses = guess_masses(atomtypes)
attrs.append(Masses(masses, guessed=True))

# Getting element information from element column.
if all(elements):
element_set = set(i.capitalize() for i in set(elements))
if all(element in SYMB2Z for element in element_set):
element_list = [i.capitalize() for i in elements]
attrs.append(Elements(np.array(element_list, dtype=object)))
else:
warnings.warn("Invalid elements found in the PDB file, "
"elements attributes will not be populated.")
else:
warnings.warn("Element information is absent or missing for a few "
"atoms. Elements attributes will not be populated.")

# Residue level stuff from here
resids = np.array(resids, dtype=np.int32)
resnames = np.array(resnames, dtype=object)
Expand Down
7 changes: 1 addition & 6 deletions testsuite/MDAnalysisTests/analysis/test_hole2.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,16 +301,11 @@ def test_context_manager(self, universe, tmpdir):

def test_output_level(self, tmpdir, universe):
with tmpdir.as_cwd():
with pytest.warns(UserWarning) as rec:
with pytest.warns(UserWarning, match='needs to be < 3'):
h = hole2.HoleAnalysis(universe,
output_level=100)
h.run(start=self.start,
stop=self.stop, random_seed=self.random_seed)
assert len(rec) == 5

assert any('needs to be < 3' in r.message.args[0] for r in rec)
assert any('has no dt information' in r.message.args[0] for r in rec) # 2x
assert any('Unit cell dimensions not found.' in r.message.args[0] for r in rec) # 2x

# no profiles
assert len(h.profiles) == 0
Expand Down
69 changes: 68 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@
IGNORE_NO_INFORMATION_WARNING = 'ignore:Found no information for attr:UserWarning'


@pytest.fixture
def dummy_universe_without_elements():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you look at minimizing the number of warnings that gets issued by your new tests? I used 2 different approaches for that in #2886: filling all the required attributes in the fixture or filtering the warnings in the tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3d7857d should have done the trick, only warnings left are parmed's ABCs from collections import.

n_atoms = 5
u = make_Universe(size=(n_atoms, 1, 1), trajectory=True)
u.add_TopologyAttr('resnames', ['RES'])
u.add_TopologyAttr('names', ['C1', 'O2', 'N3', 'S4', 'NA'])
u.dimensions = [42, 42, 42, 90, 90, 90]
return u


class TestPDBReader(_SingleFrameReader):
__test__ = True

Expand Down Expand Up @@ -1028,7 +1038,7 @@ def test_atomname_alignment(self, writtenstuff):

def test_atomtype_alignment(self, writtenstuff):
result_line = ("ATOM 1 H5T GUA A 1 7.974 6.430 9.561"
" 1.00 0.00 RNAA H\n")
" 1.00 0.00 RNAA \n")
assert_equal(writtenstuff[9], result_line)


Expand Down Expand Up @@ -1161,6 +1171,63 @@ def test_partially_missing_cryst():
assert_array_almost_equal(u.dimensions, 0.0)


@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
def test_write_no_atoms_elements(dummy_universe_without_elements):
"""
If no element symbols are provided, the PDB writer guesses.
"""
destination = StringIO()
with mda.coordinates.PDB.PDBWriter(destination) as writer:
writer.write(dummy_universe_without_elements.atoms)
content = destination.getvalue()
element_symbols = [
line[76:78].strip()
for line in content.splitlines()
if line[:6] == 'ATOM '
]
expectation = ['', '', '', '', '']
assert element_symbols == expectation


@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
def test_write_atom_elements(dummy_universe_without_elements):
"""
If element symbols are provided, they are used when writing the file.

See `Issue 2423 <https://github.com/MDAnalysis/mdanalysis/issues/2423>`_.
"""
elems = ['S', 'O', '', 'C', 'Na']
expectation = ['S', 'O', '', 'C', 'NA']
dummy_universe_with_elements = dummy_universe_without_elements
dummy_universe_with_elements.add_TopologyAttr('elements', elems)
destination = StringIO()
with mda.coordinates.PDB.PDBWriter(destination) as writer:
writer.write(dummy_universe_without_elements.atoms)
content = destination.getvalue()
element_symbols = [
line[76:78].strip()
for line in content.splitlines()
if line[:6] == 'ATOM '
]
assert element_symbols == expectation


def test_elements_roundtrip(tmpdir):
"""
Roundtrip test for PDB elements reading/writing.
"""
u = mda.Universe(CONECT)
elements = u.atoms.elements

outfile = os.path.join(str(tmpdir), 'elements.pdb')
with mda.coordinates.PDB.PDBWriter(outfile) as writer:
writer.write(u.atoms)

u_written = mda.Universe(outfile)

assert_equal(elements, u_written.atoms.elements)


def test_cryst_meaningless_warning():
# issue 2599
# FIXME: This message might change with Issue #2698
Expand Down
50 changes: 21 additions & 29 deletions testsuite/MDAnalysisTests/topology/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from io import StringIO

import pytest
import warnings
import numpy as np
from numpy.testing import assert_equal
import MDAnalysis as mda
Expand Down Expand Up @@ -66,10 +65,12 @@
("10267", 10267)
]


@pytest.mark.parametrize('hybrid, integer', hybrid36)
def test_hy36decode(hybrid, integer):
assert mda.topology.PDBParser.hy36decode(5, hybrid) == integer


class PDBBase(ParserBase):
expected_attrs = ['ids', 'names', 'record_types', 'resids',
'resnames', 'altLocs', 'icodes', 'occupancies',
Expand Down Expand Up @@ -205,6 +206,7 @@ def test_PDB_record_types():
ENDMDL
"""


def test_PDB_no_resid():
u = mda.Universe(StringIO(PDB_noresid), format='PDB')

Expand All @@ -213,6 +215,7 @@ def test_PDB_no_resid():
# should have default resid of 1
assert u.residues[0].resid == 1


PDB_hex = """\
REMARK For testing reading of hex atom numbers
REMARK This has MODELs then hex atom numbers entries
Expand All @@ -226,6 +229,7 @@ def test_PDB_no_resid():
END
"""


def test_PDB_hex():
u = mda.Universe(StringIO(PDB_hex), format='PDB')
assert len(u.atoms) == 5
Expand Down Expand Up @@ -298,37 +302,25 @@ def test_PDB_elements():
assert_equal(u.atoms.elements, element_list)


PDB_missing_ele = """\
REMARK For testing warnings in case of absent element information
ATOM 1 N ASN A 1 -8.901 4.127 -0.555 1.00 0.00
ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00
ATOM 3 C ASN A 1 -7.117 2.964 -1.897 1.00 0.00
ATOM 4 O ASN A 1 -6.634 1.849 -1.758 1.00 0.00
TER 5
HETATM 6 CU CU A 2 03.000 00.000 00.000 1.00 00.00
HETATM 7 FE FE A 3 00.000 03.000 00.000 1.00 00.00
HETATM 8 Mg Mg A 4 03.000 03.000 03.000 1.00 00.00
TER 9
"""

def test_missing_elements_noattribute():
"""Check that:

def test_missing_elements_warnings():
"""The test checks whether it returns the appropriate warning on
encountering missing elements.
1) a warning is raised if elements are missing
2) the elements attribute is not set
"""
with pytest.warns(UserWarning) as record:
u = mda.Universe(StringIO(PDB_missing_ele), format='PDB')

assert len(record) == 1
assert record[0].message.args[0] == "Element information is absent or "\
"missing for a few atoms. Elements attributes will not be populated."
wmsg = ("Element information is missing, elements attribute will not be "
"populated")
with pytest.warns(UserWarning, match=wmsg):
u = mda.Universe(PDB_small)
with pytest.raises(AttributeError):
_ = u.atoms.elements


PDB_wrong_ele = """\
REMARK For testing warnings of wrong elements
REMARK This file represent invalid elements in the elements column
ATOM 1 N ASN A 1 -8.901 4.127 -0.555 1.00 0.00 N
ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00 C
ATOM 2 CA ASN A 1 -8.608 3.135 -1.618 1.00 0.00
ATOM 3 C ASN A 1 -7.117 2.964 -1.897 1.00 0.00 C
ATOM 4 O ASN A 1 -6.634 1.849 -1.758 1.00 0.00 O
ATOM 5 X ASN A 1 -9.437 3.396 -2.889 1.00 0.00 XX
Expand All @@ -344,12 +336,12 @@ def test_wrong_elements_warnings():
"""The test checks whether there are invalid elements in the elements
column which have been parsed and returns an appropriate warning.
"""
with pytest.warns(UserWarning) as record:
with pytest.warns(UserWarning, match='Unknown element XX found'):
u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB')
assert len(record) == 2
assert record[1].message.args[0] == "Invalid elements found in the PDB "\
"file, elements attributes will not be populated."

expected = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'],
dtype=object)
assert_equal(u.atoms.elements, expected)


def test_nobonds_error():
Expand Down