Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
4046c55
stopped overwrite of chainIDs by segids but allow segids to be chainI…
HenryKobin Mar 14, 2021
cbc5b6e
added test for chainID overwrite by segids
HenryKobin Mar 14, 2021
a1447ba
updated changelog
HenryKobin Mar 14, 2021
41d14ae
pep8 corrections
HenryKobin Mar 14, 2021
d1c94fd
another pep8 fix
HenryKobin Mar 14, 2021
48bf64d
Update package/CHANGELOG
HenryKobin Mar 14, 2021
50de0b6
Update package/MDAnalysis/coordinates/PDB.py
HenryKobin Mar 14, 2021
2cb56e8
corrections to PDBParser
HenryKobin Mar 14, 2021
082acf0
further refined description on PDB file
HenryKobin Mar 14, 2021
d2814a1
whitespace correction
HenryKobin Mar 14, 2021
4282dae
fixed typo
HenryKobin Mar 14, 2021
004b011
version where chainid defaults to end of segid if invalid
HenryKobin Mar 14, 2021
b52d88f
added function to validate chainIDs on an individual basis and update…
HenryKobin Mar 16, 2021
f144c55
updated CHANGELOG
HenryKobin Mar 16, 2021
4506f33
changed using the term id to chainid to avoid python default conflicts
HenryKobin Mar 30, 2021
cbe3624
if no segids present then default to blank value
HenryKobin Mar 30, 2021
d505a14
added to changelog on PDBParser.py
HenryKobin Mar 30, 2021
14a3836
segids default to blank instead of chainIDs or 'SYSTEM'
HenryKobin Apr 7, 2021
99cebe2
Merge branch 'develop' into pdb-chain-fix
HenryKobin Apr 7, 2021
0d0bd97
reverted changes to pdbparser
HenryKobin Apr 7, 2021
0782a23
parametrized test for checking invalid chainid on write of pdb file a…
HenryKobin Apr 8, 2021
1950728
Merge branch 'develop' into pdb-chain-fix
HenryKobin Apr 8, 2021
9659693
pep8
HenryKobin Apr 8, 2021
d7a6a6b
Merge branch 'pdb-chain-fix' of https://github.com/henrykobin/mdanaly…
HenryKobin Apr 8, 2021
9f0998c
Update package/MDAnalysis/coordinates/PDB.py
HenryKobin Apr 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ Chronological list of authors
- Hannah Pollak
- Estefania Barreto-Ojeda
- Paarth Thadani

- Henry Kobin

External code
-------------

Expand Down
8 changes: 6 additions & 2 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The rules for this file:
lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu,
biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth
biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth, HenryKobin

* 2.0.0

Expand All @@ -30,6 +30,11 @@ Fixes
zone/layer is empty, consistent with 'around' (Issue #2915)
* A Universe created from an ROMol with no atoms returns now a Universe
with 0 atoms (Issue #3142)
* PDBParser will check for the presence of the chainID attribute of an
atom group and use these values instead of just using the end of segid.
If no chainID attribute is present, then a default value will be
provided. If the chainID for an atom is invalid (longer than one character,
not alpha-numeric, blank) it will be replaced with a default. (Issue #3144)
* ValueError raised when empty atomgroup is given to DensityAnalysis
without a user defined grid. UserWarning displayed when user defined
grid is provided. (Issue #3055)
Expand Down Expand Up @@ -149,7 +154,6 @@ Enhancements
* Added an RDKit converter that works for any input with all hydrogens
explicit in the topology (Issue #2468, PR #2775)


Changes
* Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134)
* TPRParser now loads TPR files with `tpr_resid_from_one=True` by default,
Expand Down
42 changes: 41 additions & 1 deletion package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,8 @@ def _write_timestep(self, ts, multiframe=False):
Writing now only uses the contents of the elements attribute
instead of guessing by default. If the elements are missing,
empty records are written out (Issue #2423).
Atoms are now checked for a valid chainID instead of being
overwritten by the last letter of the `segid` (Issue #3144).

"""
atoms = self.obj.atoms
Expand Down Expand Up @@ -1081,13 +1083,51 @@ def get_attr(attrname, default):
resnames = get_attr('resnames', 'UNK')
icodes = get_attr('icodes', ' ')
segids = get_attr('segids', ' ')
chainids = get_attr('chainIDs', '')
resids = get_attr('resids', 1)
occupancies = get_attr('occupancies', 1.0)
tempfactors = get_attr('tempfactors', 0.0)
atomnames = get_attr('names', 'X')
elements = get_attr('elements', ' ')
record_types = get_attr('record_types', 'ATOM')

def validate_chainids(chainids, default):
"""Validate each atom's chainID

chainids - np array of chainIDs
default - default value in case chainID is considered invalid
"""
invalid_length_ids = False
invalid_char_ids = False
missing_ids = False

for (i, chainid) in enumerate(chainids):
if chainid == "":
missing_ids = True
chainids[i] = default
elif len(chainid) > 1:
invalid_length_ids = True
chainids[i] = default
elif not chainid.isalnum():
invalid_char_ids = True
chainids[i] = default

if invalid_length_ids:
warnings.warn("Found chainIDs with invalid length."
" Corresponding atoms will use value of '{}'"
"".format(default))
if invalid_char_ids:
warnings.warn("Found chainIDs using unnaccepted character."
" Corresponding atoms will use value of '{}'"
"".format(default))
if missing_ids:
warnings.warn("Found missing chainIDs."
" Corresponding atoms will use value of '{}'"
"".format(default))
return chainids

chainids = validate_chainids(chainids, "X")

# If reindex == False, we use the atom ids for the serial. We do not
# want to use a fallback here.
if not self._reindex:
Expand All @@ -1107,13 +1147,13 @@ def get_attr(attrname, default):
vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i])
vals['altLoc'] = altlocs[i][:1]
vals['resName'] = resnames[i][:4]
vals['chainID'] = segids[i][-1:]
vals['resSeq'] = util.ltruncate_int(resids[i], 4)
vals['iCode'] = icodes[i][:1]
vals['pos'] = pos[i] # don't take off atom so conversion works
vals['occupancy'] = occupancies[i]
vals['tempFactor'] = tempfactors[i]
vals['segID'] = segids[i][:4]
vals['chainID'] = chainids[i]
vals['element'] = elements[i][:2].upper()

# record_type attribute, if exists, can be ATOM or HETATM
Expand Down
20 changes: 13 additions & 7 deletions testsuite/MDAnalysisTests/coordinates/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def test_writer_no_icodes(self, u_no_names, outfile):
def test_writer_no_segids(self, u_no_names, outfile):
u_no_names.atoms.write(outfile)
u = mda.Universe(outfile)
expected = np.array(['SYSTEM'] * u_no_names.atoms.n_atoms)
expected = np.array(['X'] * u_no_names.atoms.n_atoms)
assert_equal([atom.segid for atom in u.atoms], expected)

def test_writer_no_occupancies(self, u_no_names, outfile):
Expand Down Expand Up @@ -455,13 +455,19 @@ def get_MODEL_lines(filename):
# test number (only last 4 digits)
assert int(line[10:14]) == model % 10000

def test_segid_chainid(self, universe2, outfile):
"""check whether chainID comes from last character of segid (issue #2224)"""
ref_id = 'E'
u = universe2
@pytest.mark.parametrize("bad_chainid",
['@', '', 'AA'])
def test_chainid_validated(self, universe3, outfile, bad_chainid):
"""
Check that an atom's chainID is set to 'X' if the chainID
does not confirm to standards (issue #2224)
"""
default_id = 'X'
u = universe3
u.atoms.chainIDs = bad_chainid
u.atoms.write(outfile)
u_pdb = mda.Universe(outfile)
assert u_pdb.segments.chainIDs[0][0] == ref_id
assert_equal(u_pdb.segments.chainIDs[0][0], default_id)

def test_stringio_outofrange(self, universe3):
"""
Expand Down Expand Up @@ -1072,7 +1078,7 @@ def test_atomname_alignment(self, writtenstuff):
assert_equal(written[:16], reference)

def test_atomtype_alignment(self, writtenstuff):
result_line = ("ATOM 1 H5T GUA A 1 7.974 6.430 9.561"
result_line = ("ATOM 1 H5T GUA X 1 7.974 6.430 9.561"
" 1.00 0.00 RNAA \n")
assert_equal(writtenstuff[9], result_line)

Expand Down