MDAnalysis · orbeckst · Jul 29, 2020 · Jul 26, 2020 · Jul 26, 2020 · Jul 26, 2020
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -14,7 +14,7 @@ The rules for this file:
 
 ------------------------------------------------------------------------------
 ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy,
-         lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555
+         lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney
 
   * 2.0.0
 
@@ -39,6 +39,8 @@ Fixes
   * In hydrogenbonds.hbond_analysis.HydrogenbondAnalysis an AttributeError 
     was thrown when finding D-H pairs via the topology if `hydrogens` was an 
     empty AtomGroup (Issue #2848)
+  * Fixed the DMSParser, allowing the creation of multiple segids sharing
+    residues with identical resids (Issue #1387, PR #2872)    
 
 Enhancements
   * Refactored analysis.helanal into analysis.helix_analysis
@@ -83,7 +85,6 @@ Changes
 
 Deprecations
 
-
 06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira,
          PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96,
          Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria,

diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py
@@ -45,7 +45,7 @@
 import os
 
 from . import guessers
-from .base import TopologyReaderBase, squash_by
+from .base import TopologyReaderBase, change_squash
 from ..core.topology import Topology
 from ..core.topologyattrs import (
     Atomids,
@@ -176,21 +176,43 @@ def dict_factory(cursor, row):
         topattrs.append(Atomtypes(atomtypes, guessed=True))
 
         # Residues
-        atom_residx, res_resids, (res_resnames, res_segids) = squash_by(
-            attrs['resid'], attrs['resname'], attrs['segid'])
+        atom_residx, (res_resids,
+                      res_resnums,
+                      res_resnames,
+                      res_segids) = change_squash(
+            (attrs['resid'], attrs['resname'], attrs['segid']),
+            (attrs['resid'],
+             attrs['resid'].copy(),
+             attrs['resname'],
+             attrs['segid']),
+            )
+
+        n_residues = len(res_resids)
         topattrs.append(Resids(res_resids))
-        topattrs.append(Resnums(res_resids.copy()))
+        topattrs.append(Resnums(res_resnums))
         topattrs.append(Resnames(res_resnames))
 
-        # Segments
-        res_segidx, seg_segids = squash_by(
-            res_segids)[:2]
-        topattrs.append(Segids(seg_segids))
+        if any(res_segids) and not any(val is None for val in res_segids):
+            res_segidx, (res_segids,) = change_squash((res_segids,),
+                                                      (res_segids,))
+
+            uniq_seg = np.unique(res_segids)
+            idx2seg = {idx: res_segids[idx] for idx in res_segidx}
+            res_segids = uniq_seg
+            nidx = {segid: nidx for nidx, segid in enumerate(uniq_seg)}
+
+            res_segidx = np.array([nidx[idx2seg[idx]] for idx in res_segidx])
+
+            n_segments = len(res_segids)
+            topattrs.append(Segids(res_segids))
+        else:
+            n_segments = 1
+            topattrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
+            res_segidx = None
 
-        # Bonds
         topattrs.append(Bonds(attrs['bond']))
 
-        top = Topology(len(attrs['id']), len(res_resids), len(seg_segids),
+        top = Topology(len(attrs['id']), n_residues, n_segments,
                        attrs=topattrs,
                        atom_resindex=atom_residx,
                        residue_segindex=res_segidx)

diff --git a/testsuite/MDAnalysisTests/data/adk_closed_domains.dms b/testsuite/MDAnalysisTests/data/adk_closed_domains.dms
diff --git a/testsuite/MDAnalysisTests/data/adk_closed_no_segid.dms b/testsuite/MDAnalysisTests/data/adk_closed_no_segid.dms
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
@@ -115,6 +115,8 @@
     "PDB_HOLE",  # gramicidin A
     "MULTIPDB_HOLE", # gramicidin A, normal mode 7 from ElNemo
     "DMS",
+    "DMS_DOMAINS",  # ADK closed with multiple segids
+    "DMS_NO_SEGID",  # ADK closed with no segids or chains
     "CONECT",  # HIV Reverse Transcriptase with inhibitor
     "TRZ", "TRZ_psf",
     "TRIC",
@@ -419,6 +421,8 @@
 MULTIPDB_HOLE = resource_filename(__name__, 'data/1grm_elNemo_mode7.pdb.bz2')
 
 DMS = resource_filename(__name__, 'data/adk_closed.dms')
+DMS_DOMAINS = resource_filename(__name__, 'data/adk_closed_domains.dms')
+DMS_NO_SEGID = resource_filename(__name__, 'data/adk_closed_no_segid.dms')
 
 CONECT = resource_filename(__name__, 'data/1hvr.pdb')
 

diff --git a/testsuite/MDAnalysisTests/topology/test_dms.py b/testsuite/MDAnalysisTests/topology/test_dms.py
@@ -23,19 +23,19 @@
 import MDAnalysis as mda
 
 from MDAnalysisTests.topology.base import ParserBase
-from MDAnalysisTests.datafiles import DMS
+from MDAnalysisTests.datafiles import DMS_DOMAINS, DMS_NO_SEGID
 
 
 class TestDMSParser(ParserBase):
     parser = mda.topology.DMSParser.DMSParser
-    ref_filename = DMS
+    ref_filename = DMS_DOMAINS
     expected_attrs = ['ids', 'names', 'bonds', 'charges',
                       'masses', 'resids', 'resnames', 'segids',
                       'chainIDs', 'atomnums']
     guessed_attrs = ['types']
     expected_n_atoms = 3341
     expected_n_residues = 214
-    expected_n_segments = 1
+    expected_n_segments = 3
 
     def test_number_of_bonds(self, top):
         assert len(top.bonds.values) == 3365
@@ -50,8 +50,34 @@ def test_atomsels(self, filename):
         s1 = u.select_atoms("resid 33")
         assert len(s1) == 12
 
-        s2 = u.select_atoms("segid 4AKE")
+        s2 = u.select_atoms("segid NMP")
+        assert len(s2) == 437
+
+        s3 = u.select_atoms("segid LID")
+        assert len(s3) == 598
+
+        s4 = u.select_atoms("segid CORE")
+        assert len(s4) == 2306
+
+        s5 = u.select_atoms("resname ALA")
+        assert len(s5) == 190
+
+
+class TestDMSParserNoSegid(TestDMSParser):
+    ref_filename = DMS_NO_SEGID
+    expected_n_segments = 1
+
+    def test_atomsels(self, filename):
+        u = mda.Universe(filename)
+
+        s0 = u.select_atoms("name CA")
+        assert len(s0) == 214
+
+        s1 = u.select_atoms("resid 33")
+        assert len(s1) == 12
+
+        s2 = u.select_atoms("segid SYSTEM")
         assert len(s2) == 3341
 
-        s3 = u.select_atoms("resname ALA")
-        assert len(s3) == 190
+        s5 = u.select_atoms("resname ALA")
+        assert len(s5) == 190