Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ Fixes
would create a test artifact (Issue #2979, PR #2981)

Enhancements
* The PDB writer gives more control over how to write the atom ids
(Issue #1072, PR #2886)
* Preliminary support for the Linux ARM64 platform with minimal
dependencies (PR #2956)
* Refactored analysis.helanal into analysis.helix_analysis
Expand Down
58 changes: 49 additions & 9 deletions package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,10 @@ class PDBWriter(base.WriterBase):
ChainID now comes from the last character of segid, as stated in the documentation.
An indexing issue meant it previously used the first charater (Issue #2224)

.. versionchanged:: 2.0.0
Add the `redindex` argument. Setting this keyword to ``True``
(the default) preserves the behavior in earlier versions of MDAnalysis.

"""
fmt = {
'ATOM': (
Expand Down Expand Up @@ -596,7 +600,7 @@ class PDBWriter(base.WriterBase):

def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
remarks="Created by PDBWriter",
convert_units=True, multiframe=None):
convert_units=True, multiframe=None, reindex=True):
"""Create a new PDBWriter

Parameters
Expand Down Expand Up @@ -624,6 +628,9 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
``False``: write a single frame to the file; ``True``: create a
multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_
records. If ``None``, then the class default is chosen. [``None``]
reindex: bool (optional)
If ``True`` (default), the atom serial is set to be consecutive
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Which value preserves compatibility with 1.x?
  2. Have we ever been able to rountrip a PDB when the serials were not consecutively numbered starting from 1? Is this something we would want to be able to do? Is this something a user would generally expect?

numbers starting at 1. Else, use the atom id.

"""
# n_atoms = None : dummy keyword argument
Expand All @@ -637,6 +644,7 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
self.convert_units = convert_units
self._multiframe = self.multiframe if multiframe is None else multiframe
self.bonds = bonds
self._reindex = reindex

if start < 0:
raise ValueError("'Start' must be a positive value")
Expand Down Expand Up @@ -792,14 +800,35 @@ def _write_pdb_bonds(self):
if not self.obj or not hasattr(self.obj.universe, 'bonds'):
return

mapping = {index: i for i, index in enumerate(self.obj.atoms.indices)}

bondset = set(itertools.chain(*(a.bonds for a in self.obj.atoms)))
if self._reindex:
index_attribute = 'index'
mapping = {
index: i
for i, index in enumerate(self.obj.atoms.indices, start=1)
}
atoms = np.sort(self.obj.atoms.indices)
else:
index_attribute = 'id'
mapping = {id_: id_ for id_ in self.obj.atoms.ids}
atoms = np.sort(self.obj.atoms.ids)
if self.bonds == "conect":
# Write out only the bonds that were defined in CONECT records
bonds = ((bond[0].index, bond[1].index) for bond in bondset if not bond.is_guessed)
bonds = (
(
getattr(bond[0], index_attribute),
getattr(bond[1], index_attribute),
)
for bond in bondset if not bond.is_guessed
)
elif self.bonds == "all":
bonds = ((bond[0].index, bond[1].index) for bond in bondset)
bonds = (
(
getattr(bond[0], index_attribute),
getattr(bond[1], index_attribute),
)
for bond in bondset
)
else:
raise ValueError("bonds has to be either None, 'conect' or 'all'")

Expand All @@ -810,8 +839,6 @@ def _write_pdb_bonds(self):
con[a2].append(a1)
con[a1].append(a2)

atoms = np.sort(self.obj.atoms.indices)

conect = ([mapping[a]] + sorted([mapping[at] for at in con[a]])
for a in atoms if a in con)

Expand Down Expand Up @@ -1042,9 +1069,22 @@ def get_attr(attrname, default):
atomnames = get_attr('names', 'X')
record_types = get_attr('record_types', 'ATOM')

# If reindex == False, we use the atom ids for the serial. We do not
# want to use a fallback here.
if not self._reindex:
try:
atom_ids = atoms.ids
except AttributeError:
raise NoDataError(
'The "id" topology attribute is not set. '
'Either set the attribute or use reindex=True.'
)
else:
atom_ids = np.arange(len(atoms)) + 1

for i, atom in enumerate(atoms):
vals = {}
vals['serial'] = util.ltruncate_int(i + 1, 5) # check for overflow here?
vals['serial'] = util.ltruncate_int(atom_ids[i], 5) # check for overflow here?
vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i])
vals['altLoc'] = altlocs[i][:1]
vals['resName'] = resnames[i][:4]
Expand Down Expand Up @@ -1153,7 +1193,7 @@ def CONECT(self, conect):
"""Write CONECT_ record.

"""
conect = ["{0:5d}".format(entry + 1) for entry in conect]
conect = ["{0:5d}".format(entry) for entry in conect]
conect = "".join(conect)
self.pdbfile.write(self.fmt['CONECT'].format(conect))

Expand Down
67 changes: 67 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
assert_array_almost_equal,
assert_almost_equal)

IGNORE_NO_INFORMATION_WARNING = 'ignore:Found no information for attr:UserWarning'


class TestPDBReader(_SingleFrameReader):
__test__ = True
Expand Down Expand Up @@ -218,6 +220,22 @@ def universe_and_expected_dims(self, request):
def outfile(self, tmpdir):
return str(tmpdir.mkdir("PDBWriter").join('primitive-pdb-writer' + self.ext))

@pytest.fixture
def u_no_ids(self):
# The test universe does not have atom ids, but it has everything
# else the PDB writer expects to avoid issuing warnings.
universe = make_Universe(
[
'names', 'resids', 'resnames', 'altLocs',
'segids', 'occupancies', 'tempfactors',
],
trajectory=True,
)
universe.add_TopologyAttr('icodes', [' '] * len(universe.residues))
universe.add_TopologyAttr('record_types', ['ATOM'] * len(universe.atoms))
universe.dimensions = [10, 10, 10, 90, 90, 90]
return universe

@pytest.fixture
def u_no_resnames(self):
return make_Universe(['names', 'resids'], trajectory=True)
Expand Down Expand Up @@ -513,6 +531,55 @@ def test_abnormal_record_type(self, universe5, tmpdir, outfile):
with pytest.raises(ValueError, match=expected_msg):
u.atoms.write(outfile)

@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
def test_no_reindex(self, universe, outfile):
"""
When setting the `reindex` keyword to False, the atom are
not reindexed.
"""
universe.atoms.ids = universe.atoms.ids + 23
universe.atoms.write(outfile, reindex=False)
read_universe = mda.Universe(outfile)
assert np.all(read_universe.atoms.ids == universe.atoms.ids)

@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
def test_no_reindex_bonds(self, universe, outfile):
"""
When setting the `reindex` keyword to False, the connect
record match the non-reindexed atoms.
"""
universe.atoms.ids = universe.atoms.ids + 23
universe.atoms.write(outfile, reindex=False, bonds='all')
with open(outfile) as infile:
for line in infile:
if line.startswith('CONECT'):
assert line.strip() == "CONECT 23 24 25 26 27"
break
else:
raise AssertError('No CONECT record fond in the output.')

@pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
def test_reindex(self, universe, outfile):
"""
When setting the `reindex` keyword to True, the atom are
reindexed.
"""
universe.atoms.ids = universe.atoms.ids + 23
universe.atoms.write(outfile, reindex=True)
read_universe = mda.Universe(outfile)
# AG.ids is 1-based, while AG.indices is 0-based, hence the +1
assert np.all(read_universe.atoms.ids == universe.atoms.indices + 1)

def test_no_reindex_missing_ids(self, u_no_ids, outfile):
"""
When setting `reindex` to False, if there is no AG.ids,
then an exception is raised.
"""
# Making sure AG.ids is indeed missing
assert not hasattr(u_no_ids.atoms, 'ids')
with pytest.raises(mda.exceptions.NoDataError):
u_no_ids.atoms.write(outfile, reindex=False)


class TestMultiPDBReader(object):
@staticmethod
Expand Down