From 4046c55327a8e8bbcdadfaf523d61208940ec0d3 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:03:48 -0800
Subject: [PATCH 01/21] stopped overwrite of chainIDs by segids but allow
 segids to be chainIDs if chainID is blank

---
 package/MDAnalysis/coordinates/PDB.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 8a6a944ca53..e516b5c1cf2 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1051,6 +1051,8 @@ def _write_timestep(self, ts, multiframe=False):
            Writing now only uses the contents of the elements attribute
            instead of guessing by default. If the elements are missing,
            empty records are written out (Issue #2423).
+           Atoms are now checked for a chainID instead of being overwritten
+           by the last letter of the SegmentID (Issue #3144).
 
         """
         atoms = self.obj.atoms
@@ -1080,6 +1082,7 @@ def get_attr(attrname, default):
         altlocs = get_attr('altLocs', ' ')
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
+        chainids = get_attr('chainIDs', ' ')
         segids = get_attr('segids', ' ')
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
@@ -1087,6 +1090,10 @@ def get_attr(attrname, default):
         atomnames = get_attr('names', 'X')
         elements = get_attr('elements', ' ')
         record_types = get_attr('record_types', 'ATOM')
+        chainids = (
+            [id[:4] if (chainids[index] != '') else segids[index][-1:]
+            for index, id in enumerate(chainids)]
+            ) # check for a chainID, if no chainID default to segid
 
         # If reindex == False, we use the atom ids for the serial. We do not
         # want to use a fallback here.
@@ -1107,13 +1114,15 @@ def get_attr(attrname, default):
             vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i])
             vals['altLoc'] = altlocs[i][:1]
             vals['resName'] = resnames[i][:4]
-            vals['chainID'] = segids[i][-1:]
             vals['resSeq'] = util.ltruncate_int(resids[i], 4)
             vals['iCode'] = icodes[i][:1]
             vals['pos'] = pos[i]  # don't take off atom so conversion works
             vals['occupancy'] = occupancies[i]
             vals['tempFactor'] = tempfactors[i]
             vals['segID'] = segids[i][:4]
+            vals['chainID'] = (
+                chainids[i][:4] if (chainids[i][:4] != ' ') else segids[i][-1:]
+                )  # check to see if there is a valid chainID else segID
             vals['element'] = elements[i][:2].upper()
 
             # record_type attribute, if exists, can be ATOM or HETATM

From cbc5b6e0ea7ec76a330d3452d579466a830a48f3 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:04:38 -0800
Subject: [PATCH 02/21] added test for chainID overwrite by segids

---
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 5f2b18c439f..8813114805c 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -463,6 +463,15 @@ def test_segid_chainid(self, universe2, outfile):
         u_pdb = mda.Universe(outfile)
         assert u_pdb.segments.chainIDs[0][0] == ref_id
 
+    def test_segid_chainid_overwrite(self, universe3, outfile):
+        """check if chainID gets round-tripped after file write (issue #3144)"""
+        ref_id = 'x'
+        u = universe3
+        u.atoms.chainIDs = ref_id
+        u.atoms.write(outfile)
+        u_pdb = mda.Universe(outfile)
+        assert u_pdb.atoms.chainIDs[0] == ref_id
+
     def test_stringio_outofrange(self, universe3):
         """
         Check that when StringIO is used, the correct out-of-range error for

From a1447baac40b9e74e1e66b9f46ae6bdc40ccfb4e Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:05:05 -0800
Subject: [PATCH 03/21] updated changelog

---
 package/CHANGELOG | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index 7fa27bb5178..4609b31347d 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -16,16 +16,17 @@ The rules for this file:
 ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy,
          lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
          calcraven，xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
-         hanatok, rmeli, aditya-kamath, tirkarthi
+         hanatok, rmeli, aditya-kamath, tirkarthi, Henry Kobin
 
   * 2.0.0
 
 Fixes
+  * Fixed chainID being overwritten by segid during file write. (Issue #3144)
   * ValueError raised when empty atomgroup is given to DensityAnalysis
-    without a user defined grid. UserWarning displayed when user defined 
+    without a user defined grid. UserWarning displayed when user defined
     grid is provided. (Issue #3055)
   * Fixed import of several `__all__` lists (PR #3013)
-  * atommethods (_get_prev/next_residues_by_resid) returns empty residue group 
+  * atommethods (_get_prev/next_residues_by_resid) returns empty residue group
     when given empty residue group (Issue #3089)
   * MOL2 files without bonds can now be read and written (Issue #3057)
   * Write `CONECT` record only once in multi-model PDB files (Issue #3018)
@@ -97,7 +98,7 @@ Enhancements
     whitespace for ranges (Issue #3054, PR #2927)
   * Added `atol` and `rtol` keywords for float comparison (PR #2927)
   * Improved performance of the ParmEd converter (Issue #3028, PR #3029)
-  * Improved analysis class docstrings, and added missing classes to the 
+  * Improved analysis class docstrings, and added missing classes to the
     `__all__` list (PR #2998)
   * The PDB writer gives more control over how to write the atom ids
     (Issue #1072, PR #2886)
@@ -131,7 +132,7 @@ Enhancements
     'protein' selection (#2751 PR #2755)
   * Added an RDKit converter that works for any input with all hydrogens
     explicit in the topology (Issue #2468, PR #2775)
-  
+
 Changes
   * Introduces encore specific C compiler arguments to allow for lowering of
     optimisations on non-x86 platforms (Issue #1389, PR #3149)
@@ -164,7 +165,7 @@ Changes
   * Sets the minimal RDKit version for CI to 2020.03.1 (Issue #2827, PR #2831)
   * Removes deprecated waterdynamics.HydrogenBondLifetimes (PR #2842)
   * Make NeighborSearch return empty atomgroup, residue, segments instead of list (Issue #2892, PR #2907)
-  * Updated Universe creation function signatures to named arguments (Issue #2921)  
+  * Updated Universe creation function signatures to named arguments (Issue #2921)
   * The transformation was changed from a function/closure to a class with
     `__call__` (Issue #2860, PR #2859)
   * deprecated ``analysis.helanal`` module has been removed in favour of

From 41d14aedb51d228d5864c345e8569c1b93854a61 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:14:01 -0800
Subject: [PATCH 04/21] pep8 corrections

---
 package/MDAnalysis/coordinates/PDB.py             | 4 ++--
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index e516b5c1cf2..8b37dc9ec26 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1092,8 +1092,8 @@ def get_attr(attrname, default):
         record_types = get_attr('record_types', 'ATOM')
         chainids = (
             [id[:4] if (chainids[index] != '') else segids[index][-1:]
-            for index, id in enumerate(chainids)]
-            ) # check for a chainID, if no chainID default to segid
+                for index, id in enumerate(chainids)]
+            )  # check for a chainID, if no chainID default to segid
 
         # If reindex == False, we use the atom ids for the serial. We do not
         # want to use a fallback here.
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 8813114805c..54a46ccee9a 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -456,7 +456,7 @@ def get_MODEL_lines(filename):
             assert int(line[10:14]) == model % 10000
 
     def test_segid_chainid(self, universe2, outfile):
-        """check whether chainID comes from last character of segid (issue #2224)"""
+        """check if chainID comes from last character of segid (issue #2224)"""
         ref_id = 'E'
         u = universe2
         u.atoms.write(outfile)

From d1c94fd39e2382555e355578a603b026119ba609 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:21:07 -0800
Subject: [PATCH 05/21] another pep8 fix

---
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 54a46ccee9a..4b3bfa504b2 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -464,7 +464,7 @@ def test_segid_chainid(self, universe2, outfile):
         assert u_pdb.segments.chainIDs[0][0] == ref_id
 
     def test_segid_chainid_overwrite(self, universe3, outfile):
-        """check if chainID gets round-tripped after file write (issue #3144)"""
+        """check if chainID gets round-tripped after write (issue #3144)"""
         ref_id = 'x'
         u = universe3
         u.atoms.chainIDs = ref_id

From 48bf64d777405dc4bcc486727ba63b9b303c0fa5 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:24:25 -0800
Subject: [PATCH 06/21] Update package/CHANGELOG

Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com>
---
 package/CHANGELOG | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index 4609b31347d..0b0184cb64c 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -16,7 +16,7 @@ The rules for this file:
 ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy,
          lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
          calcraven，xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
-         hanatok, rmeli, aditya-kamath, tirkarthi, Henry Kobin
+         hanatok, rmeli, aditya-kamath, tirkarthi, HenryKobin
 
   * 2.0.0
 

From 50de0b6a6ca9b60249fcbdeb2b20ac65c418c18b Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 17:25:26 -0800
Subject: [PATCH 07/21] Update package/MDAnalysis/coordinates/PDB.py

Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com>
---
 package/MDAnalysis/coordinates/PDB.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 8b37dc9ec26..785c8f4bc4a 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1052,7 +1052,7 @@ def _write_timestep(self, ts, multiframe=False):
            instead of guessing by default. If the elements are missing,
            empty records are written out (Issue #2423).
            Atoms are now checked for a chainID instead of being overwritten
-           by the last letter of the SegmentID (Issue #3144).
+           by the last letter of the `segid` (Issue #3144).
 
         """
         atoms = self.obj.atoms

From 2cb56e86ca9de2b8151de19a2e50686dae08d8f6 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 19:00:54 -0800
Subject: [PATCH 08/21] corrections to PDBParser

---
 package/AUTHORS                                  |  3 ++-
 package/CHANGELOG                                |  6 ++++--
 package/MDAnalysis/coordinates/PDB.py            | 16 ++++++----------
 .../MDAnalysisTests/coordinates/test_pdb.py      | 11 +++++++----
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/package/AUTHORS b/package/AUTHORS
index 93dcb47b8a8..c0b38d62cf7 100644
--- a/package/AUTHORS
+++ b/package/AUTHORS
@@ -154,7 +154,8 @@ Chronological list of authors
   - Karthikeyan Singaravelan
 2021
   - Aditya Kamath
-  
+  - Henry Kobin
+
 External code
 -------------
 
diff --git a/package/CHANGELOG b/package/CHANGELOG
index 4609b31347d..b1b889ceaa6 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -16,12 +16,14 @@ The rules for this file:
 ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy,
          lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
          calcraven，xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
-         hanatok, rmeli, aditya-kamath, tirkarthi, Henry Kobin
+         hanatok, rmeli, aditya-kamath, tirkarthi, HenryKobin
 
   * 2.0.0
 
 Fixes
-  * Fixed chainID being overwritten by segid during file write. (Issue #3144)
+  * PDBParser will check for the presence of the chainID attribute of an
+    atom group and use these values instead of just using the end of segid.
+    If no chainID attribute is present, then segid will be used. (Issue #3144)
   * ValueError raised when empty atomgroup is given to DensityAnalysis
     without a user defined grid. UserWarning displayed when user defined
     grid is provided. (Issue #3055)
diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 8b37dc9ec26..8675b2f7fef 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1051,8 +1051,9 @@ def _write_timestep(self, ts, multiframe=False):
            Writing now only uses the contents of the elements attribute
            instead of guessing by default. If the elements are missing,
            empty records are written out (Issue #2423).
-           Atoms are now checked for a chainID instead of being overwritten
-           by the last letter of the SegmentID (Issue #3144).
+           chainID now comes from the AtomGroup if the attribute is present.
+           If no chainID is present then the chainID will default to the l
+           last letter of segid (Issue #3144).
 
         """
         atoms = self.obj.atoms
@@ -1082,18 +1083,15 @@ def get_attr(attrname, default):
         altlocs = get_attr('altLocs', ' ')
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
-        chainids = get_attr('chainIDs', ' ')
         segids = get_attr('segids', ' ')
+        chainids = getattr(atoms,'chainIDs', segids)
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)
         atomnames = get_attr('names', 'X')
         elements = get_attr('elements', ' ')
         record_types = get_attr('record_types', 'ATOM')
-        chainids = (
-            [id[:4] if (chainids[index] != '') else segids[index][-1:]
-                for index, id in enumerate(chainids)]
-            )  # check for a chainID, if no chainID default to segid
+
 
         # If reindex == False, we use the atom ids for the serial. We do not
         # want to use a fallback here.
@@ -1120,9 +1118,7 @@ def get_attr(attrname, default):
             vals['occupancy'] = occupancies[i]
             vals['tempFactor'] = tempfactors[i]
             vals['segID'] = segids[i][:4]
-            vals['chainID'] = (
-                chainids[i][:4] if (chainids[i][:4] != ' ') else segids[i][-1:]
-                )  # check to see if there is a valid chainID else segID
+            vals['chainID'] = chainids[i][-1:]
             vals['element'] = elements[i][:2].upper()
 
             # record_type attribute, if exists, can be ATOM or HETATM
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 4b3bfa504b2..665ad09d09f 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -27,7 +27,7 @@
 import numpy as np
 import pytest
 from MDAnalysisTests import make_Universe
-from MDAnalysisTests.coordinates.base import _SingleFrameReader
+from MDAnalysisTests.coordinates.base impaort _SingleFrameReader
 from MDAnalysisTests.coordinates.reference import (RefAdKSmall,
                                                    RefAdK)
 from MDAnalysisTests.datafiles import (PDB, PDB_small, PDB_multiframe,
@@ -456,7 +456,7 @@ def get_MODEL_lines(filename):
             assert int(line[10:14]) == model % 10000
 
     def test_segid_chainid(self, universe2, outfile):
-        """check if chainID comes from last character of segid (issue #2224)"""
+        """Check if chainID comes from last character of segid (issue #2224)"""
         ref_id = 'E'
         u = universe2
         u.atoms.write(outfile)
@@ -464,13 +464,16 @@ def test_segid_chainid(self, universe2, outfile):
         assert u_pdb.segments.chainIDs[0][0] == ref_id
 
     def test_segid_chainid_overwrite(self, universe3, outfile):
-        """check if chainID gets round-tripped after write (issue #3144)"""
+        """
+        Check that chainID attribute is not overwritten by SegmentID
+        if the chainID attribute is present (issue #3144)
+        """
         ref_id = 'x'
         u = universe3
         u.atoms.chainIDs = ref_id
         u.atoms.write(outfile)
         u_pdb = mda.Universe(outfile)
-        assert u_pdb.atoms.chainIDs[0] == ref_id
+        assert_equal(u_pdb.atoms.chainIDs[0], ref_id)
 
     def test_stringio_outofrange(self, universe3):
         """

From d2814a15c37297ac43e46cbca28d8cbd9be5a23b Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 19:05:02 -0800
Subject: [PATCH 09/21] whitespace correction

---
 package/MDAnalysis/coordinates/PDB.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 53248de64c1..7a2cc612be7 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1083,7 +1083,7 @@ def get_attr(attrname, default):
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
         segids = get_attr('segids', ' ')
-        chainids = getattr(atoms,'chainIDs', segids)
+        chainids = getattr(atoms, 'chainIDs', segids)
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)

From 4282daeb54e204902573338647cb241e0a5c775b Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sat, 13 Mar 2021 19:25:17 -0800
Subject: [PATCH 10/21] fixed typo

---
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 665ad09d09f..1ef3cd8a1ea 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -27,7 +27,7 @@
 import numpy as np
 import pytest
 from MDAnalysisTests import make_Universe
-from MDAnalysisTests.coordinates.base impaort _SingleFrameReader
+from MDAnalysisTests.coordinates.base import _SingleFrameReader
 from MDAnalysisTests.coordinates.reference import (RefAdKSmall,
                                                    RefAdK)
 from MDAnalysisTests.datafiles import (PDB, PDB_small, PDB_multiframe,

From 004b0111f86c967bd2b007d88b6c10d0d9a58ce1 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Sun, 14 Mar 2021 13:35:15 -0700
Subject: [PATCH 11/21] version where chainid defaults to end of segid if
 invalid

---
 package/MDAnalysis/coordinates/PDB.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 7a2cc612be7..8624fa9b44d 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1083,7 +1083,7 @@ def get_attr(attrname, default):
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
         segids = get_attr('segids', ' ')
-        chainids = getattr(atoms, 'chainIDs', segids)
+        chainids = get_attr('chainIDs', ' ')
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)
@@ -1091,6 +1091,9 @@ def get_attr(attrname, default):
         elements = get_attr('elements', ' ')
         record_types = get_attr('record_types', 'ATOM')
 
+        # If non alphanumeric occurs in chainids, resort back to using segids .
+        if not any([id.isalnum() for id in chainids]):
+            chainids = segids
 
         # If reindex == False, we use the atom ids for the serial. We do not
         # want to use a fallback here.

From b52d88fbe8bd0a744d6eb7c3344dd18624df2b14 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 16 Mar 2021 12:19:30 -0700
Subject: [PATCH 12/21] added function to validate chainIDs on an individual
 basis and updated tests to reflect changes

---
 package/MDAnalysis/coordinates/PDB.py         | 43 ++++++++++++++++---
 .../MDAnalysisTests/coordinates/test_pdb.py   | 35 ++++++++-------
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 8624fa9b44d..e435e60f725 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1083,7 +1083,7 @@ def get_attr(attrname, default):
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
         segids = get_attr('segids', ' ')
-        chainids = get_attr('chainIDs', ' ')
+        chainids = get_attr('chainIDs', 'X')
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)
@@ -1091,9 +1091,42 @@ def get_attr(attrname, default):
         elements = get_attr('elements', ' ')
         record_types = get_attr('record_types', 'ATOM')
 
-        # If non alphanumeric occurs in chainids, resort back to using segids .
-        if not any([id.isalnum() for id in chainids]):
-            chainids = segids
+        def validate_chainids(chainids, default):
+            """Validate each atom's chainID
+
+            chainids - np array of chainIDs
+            default - default value in case chainID is considered invalid
+            """
+            invalid_length_ids = False
+            invalid_char_ids = False
+            missing_ids = False
+
+            for (i, id) in enumerate(chainids):
+                if id is None:
+                    missing_ids = True
+                    chainids[i] = default
+                elif len(id) > 1:
+                    invalid_length_ids = True
+                    chainids[i] = default
+                elif not id.isalnum():
+                    invalid_char_ids = True
+                    chainids[i] = default
+
+            if invalid_length_ids:
+                warnings.warn("Found chainIDs with invalid length."
+                              " Corresponding atoms will use value of '{}'"
+                              "".format(default))
+            if invalid_char_ids:
+                warnings.warn("Found chainIDs using unnaccepted character."
+                              " Corresponding atoms will use value of '{}'"
+                              "".format(default))
+            if missing_ids:
+                warnings.warn("Found missing chainIDs."
+                              " Corresponding atoms will use value of '{}'"
+                              "".format(default))
+            return chainids
+
+        chainids = validate_chainids(chainids, "X")
 
         # If reindex == False, we use the atom ids for the serial. We do not
         # want to use a fallback here.
@@ -1120,7 +1153,7 @@ def get_attr(attrname, default):
             vals['occupancy'] = occupancies[i]
             vals['tempFactor'] = tempfactors[i]
             vals['segID'] = segids[i][:4]
-            vals['chainID'] = chainids[i][-1:]
+            vals['chainID'] = chainids[i]
             vals['element'] = elements[i][:2].upper()
 
             # record_type attribute, if exists, can be ATOM or HETATM
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 1ef3cd8a1ea..5032e1c7e30 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -455,25 +455,30 @@ def get_MODEL_lines(filename):
             # test number (only last 4 digits)
             assert int(line[10:14]) == model % 10000
 
-    def test_segid_chainid(self, universe2, outfile):
-        """Check if chainID comes from last character of segid (issue #2224)"""
-        ref_id = 'E'
-        u = universe2
-        u.atoms.write(outfile)
-        u_pdb = mda.Universe(outfile)
-        assert u_pdb.segments.chainIDs[0][0] == ref_id
-
-    def test_segid_chainid_overwrite(self, universe3, outfile):
+    def test_chainid_validated(self, universe3, outfile):
         """
-        Check that chainID attribute is not overwritten by SegmentID
-        if the chainID attribute is present (issue #3144)
+        Check that an atom's chainID is set to 'X' if the chainID
+        does not confirm to standards (issue #2224)
         """
-        ref_id = 'x'
+        default_id = 'X'
+        bad_id = '@'
+        u = universe3
+        u.atoms.chainIDs = bad_id
+        u.atoms.write(outfile)
+        u_pdb = mda.Universe(outfile)
+        assert_equal(u_pdb.segments.chainIDs[0][0], default_id)
+        bad_id = 'AA'
+        u = universe3
+        u.atoms.chainIDs = bad_id
+        u.atoms.write(outfile)
+        u_pdb = mda.Universe(outfile)
+        assert_equal(u_pdb.segments.chainIDs[0][0], default_id)
+        bad_id = ' '
         u = universe3
-        u.atoms.chainIDs = ref_id
+        u.atoms.chainIDs = bad_id
         u.atoms.write(outfile)
         u_pdb = mda.Universe(outfile)
-        assert_equal(u_pdb.atoms.chainIDs[0], ref_id)
+        assert_equal(u_pdb.segments.chainIDs[0][0], default_id)
 
     def test_stringio_outofrange(self, universe3):
         """
@@ -1084,7 +1089,7 @@ def test_atomname_alignment(self, writtenstuff):
             assert_equal(written[:16], reference)
 
     def test_atomtype_alignment(self, writtenstuff):
-        result_line = ("ATOM      1  H5T GUA A   1       7.974   6.430   9.561"
+        result_line = ("ATOM      1  H5T GUA X   1       7.974   6.430   9.561"
                        "  1.00  0.00      RNAA  \n")
         assert_equal(writtenstuff[9], result_line)
 

From f144c55c25b3d5c47cce42d6dd09ac220518e7ea Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 16 Mar 2021 12:25:10 -0700
Subject: [PATCH 13/21] updated CHANGELOG

---
 package/CHANGELOG | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index b1b889ceaa6..34201e8a6ee 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -23,7 +23,9 @@ The rules for this file:
 Fixes
   * PDBParser will check for the presence of the chainID attribute of an
     atom group and use these values instead of just using the end of segid.
-    If no chainID attribute is present, then segid will be used. (Issue #3144)
+    If no chainID attribute is present, then a default value will be
+    provided. If the chainID for an atom is invalid (longer than one character,
+    not alpha-numeric, blank) it will be replaced with a default. (Issue #3144)
   * ValueError raised when empty atomgroup is given to DensityAnalysis
     without a user defined grid. UserWarning displayed when user defined
     grid is provided. (Issue #3055)

From 4506f3398666342460d4acbf54e558a3725218ef Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 30 Mar 2021 12:22:36 -0700
Subject: [PATCH 14/21] changed using the term id to chainid to avoid python
 default conflicts

---
 package/MDAnalysis/coordinates/PDB.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index e435e60f725..5c7b14aeefd 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1051,8 +1051,8 @@ def _write_timestep(self, ts, multiframe=False):
            Writing now only uses the contents of the elements attribute
            instead of guessing by default. If the elements are missing,
            empty records are written out (Issue #2423).
-           Atoms are now checked for a chainID instead of being overwritten
-           by the last letter of the `segid` (Issue #3144).
+           Atoms are now checked for a valid chainID instead of being
+           overwritten by the last letter of the `segid` (Issue #3144).
 
         """
         atoms = self.obj.atoms
@@ -1083,7 +1083,7 @@ def get_attr(attrname, default):
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
         segids = get_attr('segids', ' ')
-        chainids = get_attr('chainIDs', 'X')
+        chainids = get_attr('chainIDs', ' ')
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)
@@ -1101,14 +1101,14 @@ def validate_chainids(chainids, default):
             invalid_char_ids = False
             missing_ids = False
 
-            for (i, id) in enumerate(chainids):
-                if id is None:
+            for (i, chainid) in enumerate(chainids):
+                if chainid is None:
                     missing_ids = True
                     chainids[i] = default
-                elif len(id) > 1:
+                elif len(chainid) > 1:
                     invalid_length_ids = True
                     chainids[i] = default
-                elif not id.isalnum():
+                elif not chainid.isalnum():
                     invalid_char_ids = True
                     chainids[i] = default
 

From cbe3624783508f64913f0cd7125dd3cdb6e1eacb Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 30 Mar 2021 12:23:03 -0700
Subject: [PATCH 15/21] if no segids present then default to blank value

---
 package/MDAnalysis/topology/PDBParser.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py
index 468d04dc985..f551a31d308 100644
--- a/package/MDAnalysis/topology/PDBParser.py
+++ b/package/MDAnalysis/topology/PDBParser.py
@@ -288,10 +288,6 @@ def _parseatoms(self):
             warnings.warn("Serial numbers went over 100,000.  "
                           "Higher serials have been guessed")
 
-        # If segids not present, try to use chainids
-        if not any(segids):
-            segids = chainids
-
         n_atoms = len(serials)
 
         attrs = []
@@ -360,7 +356,7 @@ def _parseatoms(self):
             attrs.append(Segids(segids))
         else:
             n_segments = 1
-            attrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
+            attrs.append(Segids(np.array([' '], dtype=object)))
             segidx = None
 
         top = Topology(n_atoms, n_residues, n_segments,

From d505a14609eaca1530fdc4b14713dac25f4f22e2 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 30 Mar 2021 12:24:05 -0700
Subject: [PATCH 16/21] added to changelog on PDBParser.py

---
 package/MDAnalysis/topology/PDBParser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py
index f551a31d308..c8275f563e8 100644
--- a/package/MDAnalysis/topology/PDBParser.py
+++ b/package/MDAnalysis/topology/PDBParser.py
@@ -184,6 +184,7 @@ class PDBParser(TopologyReaderBase):
        Bonds attribute is not added if no bonds are present in PDB file.
        If elements are invalid or partially missing, empty elements records
        are now assigned (Issue #2422).
+       If no segids present default to blank value.(Issue #3144)
     """
     format = ['PDB', 'ENT']
 

From 14a3836ccb4f745759360190d40280862c71a476 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 6 Apr 2021 18:44:56 -0700
Subject: [PATCH 17/21] segids default to blank instead of chainIDs or 'SYSTEM'

---
 package/MDAnalysis/topology/PDBParser.py          | 3 ++-
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py
index c8275f563e8..7aeb17a8433 100644
--- a/package/MDAnalysis/topology/PDBParser.py
+++ b/package/MDAnalysis/topology/PDBParser.py
@@ -184,7 +184,8 @@ class PDBParser(TopologyReaderBase):
        Bonds attribute is not added if no bonds are present in PDB file.
        If elements are invalid or partially missing, empty elements records
        are now assigned (Issue #2422).
-       If no segids present default to blank value.(Issue #3144)
+       If segids are not present they will default to blank instead
+       of SYSTEM.(Issue #3144).
     """
     format = ['PDB', 'ENT']
 
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 5032e1c7e30..d50fc1254c1 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -300,7 +300,7 @@ def test_writer_no_icodes(self, u_no_names, outfile):
     def test_writer_no_segids(self, u_no_names, outfile):
         u_no_names.atoms.write(outfile)
         u = mda.Universe(outfile)
-        expected = np.array(['SYSTEM'] * u_no_names.atoms.n_atoms)
+        expected = np.array([' '] * u_no_names.atoms.n_atoms)
         assert_equal([atom.segid for atom in u.atoms], expected)
 
     def test_writer_no_occupancies(self, u_no_names, outfile):

From 0d0bd971eaa9440159ea7da5aca0fcb9e18aec0d Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Tue, 6 Apr 2021 20:28:37 -0700
Subject: [PATCH 18/21] reverted changes to pdbparser

---
 package/MDAnalysis/topology/PDBParser.py          | 8 +++++---
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py
index 7aeb17a8433..468d04dc985 100644
--- a/package/MDAnalysis/topology/PDBParser.py
+++ b/package/MDAnalysis/topology/PDBParser.py
@@ -184,8 +184,6 @@ class PDBParser(TopologyReaderBase):
        Bonds attribute is not added if no bonds are present in PDB file.
        If elements are invalid or partially missing, empty elements records
        are now assigned (Issue #2422).
-       If segids are not present they will default to blank instead
-       of SYSTEM.(Issue #3144).
     """
     format = ['PDB', 'ENT']
 
@@ -290,6 +288,10 @@ def _parseatoms(self):
             warnings.warn("Serial numbers went over 100,000.  "
                           "Higher serials have been guessed")
 
+        # If segids not present, try to use chainids
+        if not any(segids):
+            segids = chainids
+
         n_atoms = len(serials)
 
         attrs = []
@@ -358,7 +360,7 @@ def _parseatoms(self):
             attrs.append(Segids(segids))
         else:
             n_segments = 1
-            attrs.append(Segids(np.array([' '], dtype=object)))
+            attrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
             segidx = None
 
         top = Topology(n_atoms, n_residues, n_segments,
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index d50fc1254c1..35a2c158be4 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -300,7 +300,7 @@ def test_writer_no_icodes(self, u_no_names, outfile):
     def test_writer_no_segids(self, u_no_names, outfile):
         u_no_names.atoms.write(outfile)
         u = mda.Universe(outfile)
-        expected = np.array([' '] * u_no_names.atoms.n_atoms)
+        expected = np.array(['X'] * u_no_names.atoms.n_atoms)
         assert_equal([atom.segid for atom in u.atoms], expected)
 
     def test_writer_no_occupancies(self, u_no_names, outfile):

From 0782a230aa1a98033beea86d50ae33f6b7444774 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Wed, 7 Apr 2021 17:24:03 -0700
Subject: [PATCH 19/21] parametrized test for checking invalid chainid on write
 of pdb file and corrected check for empty string in pdb.py

---
 package/MDAnalysis/coordinates/PDB.py         |  2 +-
 .../MDAnalysisTests/coordinates/test_pdb.py   | 19 ++++---------------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 5c7b14aeefd..91d584fb344 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1102,7 +1102,7 @@ def validate_chainids(chainids, default):
             missing_ids = False
 
             for (i, chainid) in enumerate(chainids):
-                if chainid is None:
+                if chainid == "":
                     missing_ids = True
                     chainids[i] = default
                 elif len(chainid) > 1:
diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 35a2c158be4..41d52aa0aa1 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -455,27 +455,16 @@ def get_MODEL_lines(filename):
             # test number (only last 4 digits)
             assert int(line[10:14]) == model % 10000
 
-    def test_chainid_validated(self, universe3, outfile):
+    @pytest.mark.parametrize("bad_chainid",
+                             ['@','','AA'])
+    def test_chainid_validated(self, universe3, outfile, bad_chainid):
         """
         Check that an atom's chainID is set to 'X' if the chainID
         does not confirm to standards (issue #2224)
         """
         default_id = 'X'
-        bad_id = '@'
         u = universe3
-        u.atoms.chainIDs = bad_id
-        u.atoms.write(outfile)
-        u_pdb = mda.Universe(outfile)
-        assert_equal(u_pdb.segments.chainIDs[0][0], default_id)
-        bad_id = 'AA'
-        u = universe3
-        u.atoms.chainIDs = bad_id
-        u.atoms.write(outfile)
-        u_pdb = mda.Universe(outfile)
-        assert_equal(u_pdb.segments.chainIDs[0][0], default_id)
-        bad_id = ' '
-        u = universe3
-        u.atoms.chainIDs = bad_id
+        u.atoms.chainIDs = bad_chainid
         u.atoms.write(outfile)
         u_pdb = mda.Universe(outfile)
         assert_equal(u_pdb.segments.chainIDs[0][0], default_id)

From 9659693ff3e497850586003d8b1655aa5959c678 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Wed, 7 Apr 2021 17:26:51 -0700
Subject: [PATCH 20/21] pep8

---
 testsuite/MDAnalysisTests/coordinates/test_pdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
index 41d52aa0aa1..92b970094d4 100644
--- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py
+++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py
@@ -456,7 +456,7 @@ def get_MODEL_lines(filename):
             assert int(line[10:14]) == model % 10000
 
     @pytest.mark.parametrize("bad_chainid",
-                             ['@','','AA'])
+                             ['@', '', 'AA'])
     def test_chainid_validated(self, universe3, outfile, bad_chainid):
         """
         Check that an atom's chainID is set to 'X' if the chainID

From 9f0998c21ee39a063113b4a3a3c57ec9d724d347 Mon Sep 17 00:00:00 2001
From: Henry Kobin <henry.kobin@gmail.com>
Date: Wed, 7 Apr 2021 20:18:43 -0700
Subject: [PATCH 21/21] Update package/MDAnalysis/coordinates/PDB.py

Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com>
---
 package/MDAnalysis/coordinates/PDB.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py
index 91d584fb344..badf47e8e9d 100644
--- a/package/MDAnalysis/coordinates/PDB.py
+++ b/package/MDAnalysis/coordinates/PDB.py
@@ -1083,7 +1083,7 @@ def get_attr(attrname, default):
         resnames = get_attr('resnames', 'UNK')
         icodes = get_attr('icodes', ' ')
         segids = get_attr('segids', ' ')
-        chainids = get_attr('chainIDs', ' ')
+        chainids = get_attr('chainIDs', '')
         resids = get_attr('resids', 1)
         occupancies = get_attr('occupancies', 1.0)
         tempfactors = get_attr('tempfactors', 0.0)