diff --git a/package/CHANGELOG b/package/CHANGELOG index e5e506d82a1..6440e9cef5a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,7 +14,7 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy, - lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555 + lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney * 2.0.0 @@ -39,6 +39,8 @@ Fixes * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError was thrown when finding D-H pairs via the topology if `hydrogens` was an empty AtomGroup (Issue #2848) + * Fixed the DMSParser, allowing the creation of multiple segids sharing + residues with identical resids (Issue #1387, PR #2872) Enhancements * Refactored analysis.helanal into analysis.helix_analysis @@ -83,7 +85,6 @@ Changes Deprecations - 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py index 42515792e0f..1240c9b7574 100644 --- a/package/MDAnalysis/topology/DMSParser.py +++ b/package/MDAnalysis/topology/DMSParser.py @@ -45,7 +45,7 @@ import os from . import guessers -from .base import TopologyReaderBase, squash_by +from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, @@ -176,21 +176,43 @@ def dict_factory(cursor, row): topattrs.append(Atomtypes(atomtypes, guessed=True)) # Residues - atom_residx, res_resids, (res_resnames, res_segids) = squash_by( - attrs['resid'], attrs['resname'], attrs['segid']) + atom_residx, (res_resids, + res_resnums, + res_resnames, + res_segids) = change_squash( + (attrs['resid'], attrs['resname'], attrs['segid']), + (attrs['resid'], + attrs['resid'].copy(), + attrs['resname'], + attrs['segid']), + ) + + n_residues = len(res_resids) topattrs.append(Resids(res_resids)) - topattrs.append(Resnums(res_resids.copy())) + topattrs.append(Resnums(res_resnums)) topattrs.append(Resnames(res_resnames)) - # Segments - res_segidx, seg_segids = squash_by( - res_segids)[:2] - topattrs.append(Segids(seg_segids)) + if any(res_segids) and not any(val is None for val in res_segids): + res_segidx, (res_segids,) = change_squash((res_segids,), + (res_segids,)) + + uniq_seg = np.unique(res_segids) + idx2seg = {idx: res_segids[idx] for idx in res_segidx} + res_segids = uniq_seg + nidx = {segid: nidx for nidx, segid in enumerate(uniq_seg)} + + res_segidx = np.array([nidx[idx2seg[idx]] for idx in res_segidx]) + + n_segments = len(res_segids) + topattrs.append(Segids(res_segids)) + else: + n_segments = 1 + topattrs.append(Segids(np.array(['SYSTEM'], dtype=object))) + res_segidx = None - # Bonds topattrs.append(Bonds(attrs['bond'])) - top = Topology(len(attrs['id']), len(res_resids), len(seg_segids), + top = Topology(len(attrs['id']), n_residues, n_segments, attrs=topattrs, atom_resindex=atom_residx, residue_segindex=res_segidx) diff --git a/testsuite/MDAnalysisTests/data/adk_closed_domains.dms b/testsuite/MDAnalysisTests/data/adk_closed_domains.dms new file mode 100644 index 00000000000..29203fdd8b2 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/adk_closed_domains.dms differ diff --git a/testsuite/MDAnalysisTests/data/adk_closed_no_segid.dms b/testsuite/MDAnalysisTests/data/adk_closed_no_segid.dms new file mode 100644 index 00000000000..3d09fbc3f63 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/adk_closed_no_segid.dms differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 90a2e1b8e24..19376ea647c 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -115,6 +115,8 @@ "PDB_HOLE", # gramicidin A "MULTIPDB_HOLE", # gramicidin A, normal mode 7 from ElNemo "DMS", + "DMS_DOMAINS", # ADK closed with multiple segids + "DMS_NO_SEGID", # ADK closed with no segids or chains "CONECT", # HIV Reverse Transcriptase with inhibitor "TRZ", "TRZ_psf", "TRIC", @@ -419,6 +421,8 @@ MULTIPDB_HOLE = resource_filename(__name__, 'data/1grm_elNemo_mode7.pdb.bz2') DMS = resource_filename(__name__, 'data/adk_closed.dms') +DMS_DOMAINS = resource_filename(__name__, 'data/adk_closed_domains.dms') +DMS_NO_SEGID = resource_filename(__name__, 'data/adk_closed_no_segid.dms') CONECT = resource_filename(__name__, 'data/1hvr.pdb') diff --git a/testsuite/MDAnalysisTests/topology/test_dms.py b/testsuite/MDAnalysisTests/topology/test_dms.py index 3bc78bedf40..2740c392d26 100644 --- a/testsuite/MDAnalysisTests/topology/test_dms.py +++ b/testsuite/MDAnalysisTests/topology/test_dms.py @@ -23,19 +23,19 @@ import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase -from MDAnalysisTests.datafiles import DMS +from MDAnalysisTests.datafiles import DMS_DOMAINS, DMS_NO_SEGID class TestDMSParser(ParserBase): parser = mda.topology.DMSParser.DMSParser - ref_filename = DMS + ref_filename = DMS_DOMAINS expected_attrs = ['ids', 'names', 'bonds', 'charges', 'masses', 'resids', 'resnames', 'segids', 'chainIDs', 'atomnums'] guessed_attrs = ['types'] expected_n_atoms = 3341 expected_n_residues = 214 - expected_n_segments = 1 + expected_n_segments = 3 def test_number_of_bonds(self, top): assert len(top.bonds.values) == 3365 @@ -50,8 +50,34 @@ def test_atomsels(self, filename): s1 = u.select_atoms("resid 33") assert len(s1) == 12 - s2 = u.select_atoms("segid 4AKE") + s2 = u.select_atoms("segid NMP") + assert len(s2) == 437 + + s3 = u.select_atoms("segid LID") + assert len(s3) == 598 + + s4 = u.select_atoms("segid CORE") + assert len(s4) == 2306 + + s5 = u.select_atoms("resname ALA") + assert len(s5) == 190 + + +class TestDMSParserNoSegid(TestDMSParser): + ref_filename = DMS_NO_SEGID + expected_n_segments = 1 + + def test_atomsels(self, filename): + u = mda.Universe(filename) + + s0 = u.select_atoms("name CA") + assert len(s0) == 214 + + s1 = u.select_atoms("resid 33") + assert len(s1) == 12 + + s2 = u.select_atoms("segid SYSTEM") assert len(s2) == 3341 - s3 = u.select_atoms("resname ALA") - assert len(s3) == 190 + s5 = u.select_atoms("resname ALA") + assert len(s5) == 190