MDAnalysis · lilyminium · Sep 30, 2020 · Sep 29, 2020 · Sep 29, 2020 · Sep 29, 2020
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -20,6 +20,8 @@ The rules for this file:
   * 2.0.0
 
 Fixes
+  * ParmedParser no longer guesses elements if they are not recognised, instead
+    empty strings are assigned (Issue #2933)
   * Instead of using ATOM for both ATOM and HETATM, HETATM record type
     keyword is properly written out by PDB writer (Issue #1753, PR #2880).
   * Change referenced PDB standard version to 3.3 (Issue #2906).

diff --git a/package/MDAnalysis/topology/ParmEdParser.py b/package/MDAnalysis/topology/ParmEdParser.py
@@ -1,5 +1,5 @@
 # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
-# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 #
 # MDAnalysis --- https://www.mdanalysis.org
 # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
@@ -25,14 +25,15 @@
 ParmEd topology parser
 ======================
 
-Converts a `ParmEd <https://parmed.github.io/ParmEd/html>`_ :class:`parmed.structure.Structure` into a :class:`MDAnalysis.core.Topology`.
+Converts a `ParmEd <https://parmed.github.io/ParmEd/html>`_
+:class:`parmed.structure.Structure` into a :class:`MDAnalysis.core.Topology`.
 
 
 Example
 -------
 
-If you want to use an MDAnalysis-written ParmEd structure for simulation 
-in ParmEd, you need to first read your files with ParmEd to include the 
+If you want to use an MDAnalysis-written ParmEd structure for simulation
+in ParmEd, you need to first read your files with ParmEd to include the
 necessary topology parameters. ::
 
     >>> import parmed as pmd
@@ -42,7 +43,7 @@
     >>> prm
     <AmberParm 3026 atoms; 1003 residues; 3025 bonds; PBC (orthogonal); parametrized>
 
-We can then convert this to an MDAnalysis structure, select only the 
+We can then convert this to an MDAnalysis structure, select only the
 protein atoms, and then convert it back to ParmEd. ::
 
     >>> u = mda.Universe(prm)
@@ -53,7 +54,8 @@
     >>> prm_prot
     <Structure 23 atoms; 2 residues; 22 bonds; PBC (orthogonal); parametrized>
 
-From here you can create an OpenMM simulation system and minimize the energy. ::
+From here you can create an OpenMM simulation system and minimize the
+energy. ::
 
     >>> import simtk.openmm as mm
     >>> import simtk.openmm.app as app
@@ -81,12 +83,8 @@
 
 """
 import logging
-import functools
-from math import ceil
 import numpy as np
 
-from ..lib.util import openany
-from .guessers import guess_atom_element
 from .base import TopologyReaderBase, change_squash
 from .tables import Z2SYMB
 from ..core.topologyattrs import (
@@ -122,12 +120,14 @@
 
 logger = logging.getLogger("MDAnalysis.topology.ParmEdParser")
 
+
 def squash_identical(values):
     if len(values) == 1:
         return values[0]
     else:
         return tuple(values)
 
+
 class ParmEdParser(TopologyReaderBase):
     """
     For ParmEd structures
@@ -153,6 +153,12 @@ def parse(self, **kwargs):
         Returns
         -------
         MDAnalysis *Topology* object
+
+
+        .. versionchanged:: 2.0.0
+           Elements are no longer guessed, if the elements present in the
+           parmed object are not recoginsed (usually given an atomic mass of 0)
+           then they will be assigned an empty string.
         """
         structure = self.filename
 
@@ -182,8 +188,6 @@ def parse(self, **kwargs):
         rmin14s = []
         epsilon14s = []
 
-
-
         for atom in structure.atoms:
             names.append(atom.name)
             masses.append(atom.mass)
@@ -209,7 +213,7 @@ def parse(self, **kwargs):
             epsilons.append(atom.epsilon)
             rmin14s.append(atom.rmin_14)
             epsilon14s.append(atom.epsilon_14)
-        
+
         attrs = []
 
         n_atoms = len(names)
@@ -220,7 +224,7 @@ def parse(self, **kwargs):
             try:
                 elements.append(Z2SYMB[z])
             except KeyError:
-                elements.append(guess_atom_element(name))
+                elements.append('')
 
         # Make Atom TopologyAttrs
         for vals, Attr, dtype in (
@@ -232,7 +236,7 @@ def parse(self, **kwargs):
                 (serials, Atomids, np.int32),
                 (chainids, ChainIDs, object),
 
-                (altLocs, AltLocs, object),                
+                (altLocs, AltLocs, object),
                 (bfactors, Tempfactors, np.float32),
                 (occupancies, Occupancies, np.float32),
 
@@ -253,7 +257,8 @@ def parse(self, **kwargs):
         segids = np.array(segids, dtype=object)
 
         residx, (resids, resnames, chainids, segids) = change_squash(
-            (resids, resnames, chainids, segids), (resids, resnames, chainids, segids))
+            (resids, resnames, chainids, segids),
+            (resids, resnames, chainids, segids))
 
         n_residues = len(resids)
         attrs.append(Resids(resids))
@@ -284,7 +289,6 @@ def parse(self, **kwargs):
         cmap_values = {}
         cmap_types = []
 
-
         for bond in structure.bonds:
             idx = (bond.atom1.idx, bond.atom2.idx)
             if idx not in bond_values:
@@ -299,11 +303,11 @@ def parse(self, **kwargs):
             bond_values, bond_types, bond_orders = [], [], []
         else:
             bond_types, bond_orders = zip(*values)
-            
+
             bond_types = list(map(squash_identical, bond_types))
             bond_orders = list(map(squash_identical, bond_orders))
 
-        attrs.append(Bonds(bond_values, types=bond_types, guessed=False, 
+        attrs.append(Bonds(bond_values, types=bond_types, guessed=False,
                            order=bond_orders))
 
         for pmdlist, na, values, types in (
@@ -343,4 +347,3 @@ def parse(self, **kwargs):
                        residue_segindex=segidx)
 
         return top
-
diff --git a/testsuite/MDAnalysisTests/topology/test_parmed.py b/testsuite/MDAnalysisTests/topology/test_parmed.py
@@ -21,8 +21,10 @@
 # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
 #
 import pytest
-import MDAnalysis as mda
+import numpy as np
+from numpy.testing import assert_equal
 
+import MDAnalysis as mda
 from MDAnalysisTests.topology.base import ParserBase
 from MDAnalysisTests.datafiles import (
     PSF_NAMD_GBIS,
@@ -31,6 +33,7 @@
 
 pmd = pytest.importorskip('parmed')
 
+
 class BaseTestParmedParser(ParserBase):
     parser = mda.topology.ParmEdParser.ParmEdParser
     expected_attrs = ['ids', 'names', 'types', 'masses',
@@ -40,7 +43,7 @@ class BaseTestParmedParser(ParserBase):
                       'epsilon14s', 'elements', 'chainIDs',
                       'resids', 'resnames', 'resnums',
                       'segids',
-                      'bonds', 'ureybradleys', 'angles', 
+                      'bonds', 'ureybradleys', 'angles',
                       'dihedrals', 'impropers', 'cmaps']
 
     expected_n_atoms = 0
@@ -69,64 +72,71 @@ def test_bonds_total_counts(self, top, filename):
         unique = set([(a.atom1.idx, a.atom2.idx)
                       for a in filename.bonds])
         assert len(top.bonds.values) == len(unique)
-    
+
     def test_angles_total_counts(self, top, filename):
-        unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx) 
+        unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx)
                       for a in filename.angles])
         assert len(top.angles.values) == len(unique)
 
     def test_dihedrals_total_counts(self, top, filename):
         unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx)
-             for a in filename.dihedrals])
+                      for a in filename.dihedrals])
         assert len(top.dihedrals.values) == len(unique)
-    
+
     def test_impropers_total_counts(self, top, filename):
         unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, a.atom4.idx)
                       for a in filename.impropers])
         assert len(top.impropers.values) == len(unique)
 
     def test_cmaps_total_counts(self, top, filename):
-        unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx, 
+        unique = set([(a.atom1.idx, a.atom2.idx, a.atom3.idx,
                        a.atom4.idx, a.atom5.idx)
                       for a in filename.cmaps])
         assert len(top.cmaps.values) == len(unique)
-    
+
     def test_ureybradleys_total_counts(self, top, filename):
         unique = set([(a.atom1.idx, a.atom2.idx)
                       for a in filename.urey_bradleys])
         assert len(top.ureybradleys.values) == len(unique)
 
+    def test_elements(self, top):
+        for erange, evals in zip(self.elems_ranges, self.expected_elems):
+            assert_equal(top.elements.values[erange[0]:erange[1]], evals,
+                         "unexpected element match")
 
 
 class TestParmedParserPSF(BaseTestParmedParser):
     """
     PSF with CMAPs
     """
-    
+
     ref_filename = PSF_NAMD_GBIS
-    
+
     expected_n_atoms = 3341
     expected_n_residues = 214
     expected_n_bonds = 3365
     expected_n_angles = 6123
     expected_n_dihedrals = 8921
     expected_n_impropers = 541
     expected_n_cmaps = 212
+    elems_ranges = ((0, 3342),)
+    # No atomic numbers set by parmed == no elements
+    expected_elems = (np.array(['' for i in range(3341)], dtype=object),)
 
     def test_bonds_atom_counts(self, universe):
         assert len(universe.atoms[[0]].bonds) == 4
         assert len(universe.atoms[[42]].bonds) == 1
 
     @pytest.mark.parametrize('value', (
-        (0, 1), 
-        (0, 2), 
-        (0, 3), 
+        (0, 1),
+        (0, 2),
+        (0, 3),
         (0, 4),
         ))
     def test_bonds_identity(self, top, value):
         vals = top.bonds.values
         assert value in vals or value[::-1] in vals
-    
+
     def test_bond_types(self, universe):
         b1 = universe.bonds[0]
 
@@ -139,8 +149,8 @@ def test_angles_atom_counts(self, universe):
         assert len(universe.atoms[[42]].angles), 2
 
     @pytest.mark.parametrize('value', (
-        (1, 0, 2), 
-        (1, 0, 3), 
+        (1, 0, 2),
+        (1, 0, 3),
         (1, 0, 4),
         ))
     def test_angles_identity(self, top, value):
@@ -151,15 +161,14 @@ def test_dihedrals_atom_counts(self, universe):
         assert len(universe.atoms[[0]].dihedrals) == 14
 
     @pytest.mark.parametrize('value', (
-        (0, 4, 6, 7), 
-        (0, 4, 6, 8), 
-        (0, 4, 6, 9), 
+        (0, 4, 6, 7),
+        (0, 4, 6, 8),
+        (0, 4, 6, 9),
         (0, 4, 17, 18),
         ))
     def test_dihedrals_identity(self, top, value):
         vals = top.dihedrals.values
         assert value in vals or value[::-1] in vals
-
 
     @pytest.mark.parametrize('value', (
         (17, 19, 21, 41, 43),
@@ -168,16 +177,22 @@ def test_dihedrals_identity(self, top, value):
     def test_cmaps_identity(self, top, value):
         vals = top.cmaps.values
         assert value in vals or value[::-1] in vals
-
+
+
 class TestParmedParserPRM(BaseTestParmedParser):
     """
     PRM
     """
-    
+
     ref_filename = PRM
-    
+
     expected_n_atoms = 252
     expected_n_residues = 14
+    elems_ranges = ((0, 8), (30, 38))
+    expected_elems = (np.array(['N', 'H', 'H', 'H', 'C', 'H', 'C', 'H'],
+                               dtype=object),
+                      np.array(['H', 'C', 'H', 'H', 'C', 'C', 'H', 'C'],
+                               dtype=object))
 
     def test_bonds_atom_counts(self, universe):
         assert len(universe.atoms[[0]].bonds) == 4
@@ -192,7 +207,7 @@ def test_bonds_atom_counts(self, universe):
     def test_bonds_identity(self, top, value):
         vals = top.bonds.values
         assert value in vals or value[::-1] in vals
-    
+
     def test_bond_types(self, universe):
         b1 = universe.bonds[0]
 
@@ -225,9 +240,9 @@ def test_dihedrals_atom_counts(self, universe):
     def test_dihedrals_identity(self, top, value):
         vals = top.dihedrals.values
         assert value in vals or value[::-1] in vals
-    
+
     def test_dihedral_types(self, universe):
-        ag = universe.atoms[[10,12,14,16]]
+        ag = universe.atoms[[10, 12, 14, 16]]
         dih = universe.dihedrals.atomgroup_intersection(ag,
                                                         strict=True)[0]
         assert len(dih.type) == 4