From 6cdb64b36e1f986ad9fe551753729520025996cb Mon Sep 17 00:00:00 2001 From: hanatok Date: Tue, 17 Nov 2020 08:38:53 -0600 Subject: [PATCH 1/4] Improve the performance of ParmEd converter. (Fix #3028) The origin code used index() of list to lookup the atom indices, which is nearly O(N^2) when iterating all atoms. This commit converts the list to a dictionary mapping the atom objects to indices, and hence improves the overall performance. --- package/MDAnalysis/coordinates/ParmEd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/ParmEd.py b/package/MDAnalysis/coordinates/ParmEd.py index 33ec782c48f..5a7c64b19d1 100644 --- a/package/MDAnalysis/coordinates/ParmEd.py +++ b/package/MDAnalysis/coordinates/ParmEd.py @@ -132,7 +132,7 @@ def _read_first_frame(self): } def get_indices_from_subset(i, atomgroup=None, universe=None): - return atomgroup.index(universe.atoms[i]) + return atomgroup[universe.atoms[i]] class ParmEdConverter(base.ConverterBase): """Convert MDAnalysis AtomGroup or Universe to ParmEd :class:`~parmed.structure.Structure`. @@ -262,8 +262,9 @@ def convert(self, obj): struct.box = None if hasattr(ag_or_ts, 'universe'): + atomgroup = {atom: index for index, atom in enumerate(list(ag_or_ts))} get_atom_indices = functools.partial(get_indices_from_subset, - atomgroup=list(ag_or_ts), + atomgroup=atomgroup, universe=ag_or_ts.universe) else: get_atom_indices = lambda x: x From ec12676d76689857aebe0b5be7ec5e68475c6041 Mon Sep 17 00:00:00 2001 From: hanatok Date: Tue, 17 Nov 2020 08:57:56 -0600 Subject: [PATCH 2/4] Fix a PEP 8 warning in the previous commit. --- package/MDAnalysis/coordinates/ParmEd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/ParmEd.py b/package/MDAnalysis/coordinates/ParmEd.py index 5a7c64b19d1..8b28bb47ca3 100644 --- a/package/MDAnalysis/coordinates/ParmEd.py +++ b/package/MDAnalysis/coordinates/ParmEd.py @@ -262,7 +262,8 @@ def convert(self, obj): struct.box = None if hasattr(ag_or_ts, 'universe'): - atomgroup = {atom: index for index, atom in enumerate(list(ag_or_ts))} + atomgroup = {atom: index for index, + atom in enumerate(list(ag_or_ts))} get_atom_indices = functools.partial(get_indices_from_subset, atomgroup=atomgroup, universe=ag_or_ts.universe) From 7d36d8b33f0baed0c82785f7640b4fa0ac2bfb03 Mon Sep 17 00:00:00 2001 From: hanatok Date: Tue, 17 Nov 2020 20:39:25 -0600 Subject: [PATCH 3/4] Update AUTHORS and CHANGELOG. --- package/AUTHORS | 1 + package/CHANGELOG | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/package/AUTHORS b/package/AUTHORS index 41bfd346411..52b6003b9cf 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -150,6 +150,7 @@ Chronological list of authors - Edis Jakupovic - Nicholas Craven - Mieczyslaw Torchala + - Haochuan Chen External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index c12b68350ce..5c370f43954 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,8 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy, lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, - calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri + calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri, + hanatok * 2.0.0 @@ -107,6 +108,7 @@ Enhancements 'protein' selection (#2751 PR #2755) * Added an RDKit converter that works for any input with all hydrogens explicit in the topology (Issue #2468, PR #2775) + * Improved performance of the ParmEd converter (Issue #3028, PR #3029) Changes * Continuous integration uses mamba rather than conda to install the From 72c94e044bd8704847130dac1c8a44ca3b1ee639 Mon Sep 17 00:00:00 2001 From: hanatok Date: Wed, 18 Nov 2020 19:10:32 -0600 Subject: [PATCH 4/4] Update CHANGELOG. --- package/CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 5c370f43954..bcc74b0a142 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -74,6 +74,7 @@ Fixes would create a test artifact (Issue #2979, PR #2981) Enhancements + * Improved performance of the ParmEd converter (Issue #3028, PR #3029) * Improved analysis class docstrings, and added missing classes to the `__all__` list (PR #2998) * The PDB writer gives more control over how to write the atom ids @@ -108,7 +109,6 @@ Enhancements 'protein' selection (#2751 PR #2755) * Added an RDKit converter that works for any input with all hydrogens explicit in the topology (Issue #2468, PR #2775) - * Improved performance of the ParmEd converter (Issue #3028, PR #3029) Changes * Continuous integration uses mamba rather than conda to install the