MDAnalysis · jbarnoud · Oct 21, 2020 · Aug 6, 2020 · Oct 16, 2020 · Oct 16, 2020
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -66,6 +66,8 @@ Fixes
     would create a test artifact (Issue #2979, PR #2981)
 
 Enhancements
+  * The PDB writer gives more control over how to write the atom ids
+    (Issue #1072, PR #2886)
   * Preliminary support for the Linux ARM64 platform with minimal
     dependencies (PR #2956)
   * Refactored analysis.helanal into analysis.helix_analysis

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
@@ -533,6 +533,10 @@ class PDBWriter(base.WriterBase):
        ChainID now comes from the last character of segid, as stated in the documentation.
        An indexing issue meant it previously used the first charater (Issue #2224)
 
+    .. versionchanged:: 2.0.0
+        Add the `redindex` argument. Setting this keyword to ``True``
+        (the default) preserves the behavior in earlier versions of MDAnalysis.
+
     """
     fmt = {
         'ATOM': (
@@ -596,7 +600,7 @@ class PDBWriter(base.WriterBase):
 
     def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
                  remarks="Created by PDBWriter",
-                 convert_units=True, multiframe=None):
+                 convert_units=True, multiframe=None, reindex=True):
         """Create a new PDBWriter
 
         Parameters
@@ -624,6 +628,9 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
            ``False``: write a single frame to the file; ``True``: create a
            multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_
            records. If ``None``, then the class default is chosen.    [``None``]
+        reindex: bool (optional)
+            If ``True`` (default), the atom serial is set to be consecutive
+            numbers starting at 1. Else, use the atom id.
 
         """
         # n_atoms = None : dummy keyword argument
@@ -637,6 +644,7 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
         self.convert_units = convert_units
         self._multiframe = self.multiframe if multiframe is None else multiframe
         self.bonds = bonds
+        self._reindex = reindex
 
         if start < 0:
             raise ValueError("'Start' must be a positive value")
@@ -792,14 +800,35 @@ def _write_pdb_bonds(self):
         if not self.obj or not hasattr(self.obj.universe, 'bonds'):
             return
 
-        mapping = {index: i for i, index in enumerate(self.obj.atoms.indices)}
-
         bondset = set(itertools.chain(*(a.bonds for a in self.obj.atoms)))
+        if self._reindex:
+            index_attribute = 'index'
+            mapping = {
+                index: i
+                for i, index in enumerate(self.obj.atoms.indices, start=1)
+            }
+            atoms = np.sort(self.obj.atoms.indices)
+        else:
+            index_attribute = 'id'
+            mapping = {id_: id_ for id_ in self.obj.atoms.ids}
+            atoms = np.sort(self.obj.atoms.ids)
         if self.bonds == "conect":
             # Write out only the bonds that were defined in CONECT records
-            bonds = ((bond[0].index, bond[1].index) for bond in bondset if not bond.is_guessed)
+            bonds = (
+                (
+                    getattr(bond[0], index_attribute),
+                    getattr(bond[1], index_attribute),
+                )
+                for bond in bondset if not bond.is_guessed
+            )
         elif self.bonds == "all":
-            bonds = ((bond[0].index, bond[1].index) for bond in bondset)
+            bonds = (
+                (
+                    getattr(bond[0], index_attribute),
+                    getattr(bond[1], index_attribute),
+                )
+                for bond in bondset
+            )
         else:
             raise ValueError("bonds has to be either None, 'conect' or 'all'")
 
@@ -810,8 +839,6 @@ def _write_pdb_bonds(self):
             con[a2].append(a1)
             con[a1].append(a2)
 
-        atoms = np.sort(self.obj.atoms.indices)
-
         conect = ([mapping[a]] + sorted([mapping[at] for at in con[a]])
                   for a in atoms if a in con)
 
@@ -1042,9 +1069,22 @@ def get_attr(attrname, default):
         atomnames = get_attr('names', 'X')
         record_types = get_attr('record_types', 'ATOM')
 
+        # If reindex == False, we use the atom ids for the serial. We do not
+        # want to use a fallback here.
+        if not self._reindex:
+            try:
+                atom_ids = atoms.ids
+            except AttributeError:
+                raise NoDataError(
+                    'The "id" topology attribute is not set. '
+                    'Either set the attribute or use reindex=True.'
+                )
+        else:
+            atom_ids = np.arange(len(atoms)) + 1
+
         for i, atom in enumerate(atoms):
             vals = {}
-            vals['serial'] = util.ltruncate_int(i + 1, 5)  # check for overflow here?
+            vals['serial'] = util.ltruncate_int(atom_ids[i], 5)  # check for overflow here?
             vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i])
             vals['altLoc'] = altlocs[i][:1]
             vals['resName'] = resnames[i][:4]
@@ -1153,7 +1193,7 @@ def CONECT(self, conect):
         """Write CONECT_ record.
 
         """
-        conect = ["{0:5d}".format(entry + 1) for entry in conect]
+        conect = ["{0:5d}".format(entry) for entry in conect]
         conect = "".join(conect)
         self.pdbfile.write(self.fmt['CONECT'].format(conect))
 

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -42,6 +42,8 @@
                            assert_array_almost_equal,
                            assert_almost_equal)
 
+IGNORE_NO_INFORMATION_WARNING = 'ignore:Found no information for attr:UserWarning'
+
 
 class TestPDBReader(_SingleFrameReader):
     __test__ = True
@@ -218,6 +220,22 @@ def universe_and_expected_dims(self, request):
     def outfile(self, tmpdir):
         return str(tmpdir.mkdir("PDBWriter").join('primitive-pdb-writer' + self.ext))
 
+    @pytest.fixture
+    def u_no_ids(self):
+        # The test universe does not have atom ids, but it has everything
+        # else the PDB writer expects to avoid issuing warnings.
+        universe = make_Universe(
+            [
+                'names', 'resids', 'resnames', 'altLocs',
+                'segids', 'occupancies', 'tempfactors',
+            ],
+            trajectory=True,
+        )
+        universe.add_TopologyAttr('icodes', [' '] * len(universe.residues))
+        universe.add_TopologyAttr('record_types', ['ATOM'] * len(universe.atoms))
+        universe.dimensions = [10, 10, 10, 90, 90, 90]
+        return universe
+
     @pytest.fixture
     def u_no_resnames(self):
         return make_Universe(['names', 'resids'], trajectory=True)
@@ -513,6 +531,55 @@ def test_abnormal_record_type(self, universe5, tmpdir, outfile):
         with pytest.raises(ValueError, match=expected_msg):
             u.atoms.write(outfile)
 
+    @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
+    def test_no_reindex(self, universe, outfile):
+        """
+        When setting the `reindex` keyword to False, the atom are
+        not reindexed.
+        """
+        universe.atoms.ids = universe.atoms.ids + 23
+        universe.atoms.write(outfile, reindex=False)
+        read_universe = mda.Universe(outfile)
+        assert np.all(read_universe.atoms.ids == universe.atoms.ids)
+
+    @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
+    def test_no_reindex_bonds(self, universe, outfile):
+        """
+        When setting the `reindex` keyword to False, the connect
+        record match the non-reindexed atoms.
+        """
+        universe.atoms.ids = universe.atoms.ids + 23
+        universe.atoms.write(outfile, reindex=False, bonds='all')
+        with open(outfile) as infile:
+            for line in infile:
+                if line.startswith('CONECT'):
+                    assert line.strip() == "CONECT   23   24   25   26   27"
+                    break
+            else:
+                raise AssertError('No CONECT record fond in the output.')
+
+    @pytest.mark.filterwarnings(IGNORE_NO_INFORMATION_WARNING)
+    def test_reindex(self, universe, outfile):
+        """
+        When setting the `reindex` keyword to True, the atom are
+        reindexed.
+        """
+        universe.atoms.ids = universe.atoms.ids + 23
+        universe.atoms.write(outfile, reindex=True)
+        read_universe = mda.Universe(outfile)
+        # AG.ids is 1-based, while AG.indices is 0-based, hence the +1
+        assert np.all(read_universe.atoms.ids == universe.atoms.indices + 1)
+
+    def test_no_reindex_missing_ids(self, u_no_ids, outfile):
+        """
+        When setting `reindex` to False, if there is no AG.ids,
+        then an exception is raised.
+        """
+        # Making sure AG.ids is indeed missing
+        assert not hasattr(u_no_ids.atoms, 'ids')
+        with pytest.raises(mda.exceptions.NoDataError):
+            u_no_ids.atoms.write(outfile, reindex=False)
+
 
 class TestMultiPDBReader(object):
     @staticmethod