From 0a0cf37f341c7b639de5a8ed6f4637f42e3ad086 Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Sat, 24 Oct 2020 01:15:56 +0100 Subject: [PATCH 1/2] Implements cache validation at the Universe level Objects keep their own caches, but those can now be set to be validated against a centralized registry under object.universe. This simplifies the centralized invalidation of object caches. Applied to fragment caching and added asv benchmark. Already large speedup in fragment accession (Fixes #2376). Tests include cache invalidation when bonds are added/removed. Added asv's benchmarks/results subdir to .gitignore --- .gitignore | 2 + benchmarks/benchmarks/ag_methods.py | 9 ++++ package/CHANGELOG | 5 +- package/MDAnalysis/core/topologyattrs.py | 2 + package/MDAnalysis/core/universe.py | 8 ++- package/MDAnalysis/lib/util.py | 50 +++++++++++++++++-- .../MDAnalysisTests/core/test_fragments.py | 18 +++++++ testsuite/MDAnalysisTests/lib/test_util.py | 39 +++++++++++++++ 8 files changed, 126 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 7038144cc4b..70f1106a6a2 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,8 @@ authors.py *.DS_Store # ignore files from tests .hypothesis/ +# ignore results from asv +benchmarks/results # duecredit .duecredit.p diff --git a/benchmarks/benchmarks/ag_methods.py b/benchmarks/benchmarks/ag_methods.py index ce9b22f1b47..57e969ba92a 100644 --- a/benchmarks/benchmarks/ag_methods.py +++ b/benchmarks/benchmarks/ag_methods.py @@ -403,3 +403,12 @@ def setup(self, universe_type): def time_find_fragments(self, universe_type): frags = self.u.atoms.fragments + +class FragmentCaching(FragmentFinding): + """Test how quickly we find cached fragments""" + def setup(self, universe_type): + super(FragmentCaching, self).setup(universe_type) + frags = self.u.atoms.fragments # Priming the cache + + def time_find_cached_fragments(self, universe_type): + frags = self.u.atoms.fragments diff --git a/package/CHANGELOG b/package/CHANGELOG index 21b4283e401..6b412144524 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -81,8 +81,11 @@ Fixes * Fix syntax warning over comparison of literals using is (Issue #3066) Enhancements + * Caches can now undergo central validation at the Universe level, opening + the door to more useful caching. Already applied to fragment caching + (Issue #2376, PR #3135) * Code for operations on compounds refactored, centralized and optimized for - performance (Issue #3000) + performance (Issue #3000, PR #3005) * Added automatic selection class generation for TopologyAttrs, FloatRangeSelection, and BoolSelection (Issues #2925, #2875; PR #2927) * Added 'to' operator, negatives, scientific notation, and arbitrary diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 2282b706f01..71ee04a850f 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2404,6 +2404,7 @@ def fragindex(self): """ return self.universe._fragdict[self.ix].ix + @cached('fragindices', universe_validation=True) def fragindices(self): r"""The :class:`fragment indices` @@ -2437,6 +2438,7 @@ def fragment(self): """ return self.universe._fragdict[self.ix].fragment + @cached('fragments', universe_validation=True) def fragments(self): """Read-only :class:`tuple` of :class:`fragments`. diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index e4596ee6aba..43fdf890896 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -308,7 +308,7 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, in_memory_step=1, **kwargs): self._trajectory = None # managed attribute holding Reader - self._cache = {} + self._cache = {'_valid': {}} self.atoms = None self.residues = None self.segments = None @@ -1002,7 +1002,10 @@ def add_bonds(self, values, types=None, guessed=False, order=None): """ self._add_topology_objects('bonds', values, types=types, guessed=guessed, order=order) + # Invalidate bond-related caches self._cache.pop('fragments', None) + self._cache['_valid'].pop('fragments', None) + self._cache['_valid'].pop('fragindices', None) def add_angles(self, values, types=None, guessed=False): """Add new Angles to this Universe. @@ -1139,7 +1142,10 @@ def delete_bonds(self, values): .. versionadded:: 1.0.0 """ self._delete_topology_objects('bonds', values) + # Invalidate bond-related caches self._cache.pop('fragments', None) + self._cache['_valid'].pop('fragments', None) + self._cache['_valid'].pop('fragindices', None) def delete_angles(self, values): """Delete Angles from this Universe. diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index 9916c45bd8d..8805b5c04ec 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -202,6 +202,7 @@ import functools from functools import wraps import textwrap +import weakref import mmtf import numpy as np @@ -1496,10 +1497,18 @@ def conv_float(s): return s -def cached(key): +# A dummy, empty, cheaply-hashable object class to use with weakref caching. +# (class object doesn't allow weakrefs to its instances, but user-defined +# classes do) +class _CacheKey: + pass + + +def cached(key, universe_validation=False): """Cache a property within a class. - Requires the Class to have a cache dict called ``_cache``. + Requires the Class to have a cache dict :attr:`_cache` and, with + `universe_validation`, a :attr:`universe` with a cache dict :attr:`_cache`. Example ------- @@ -1513,23 +1522,54 @@ class A(object): @property @cached('keyname') def size(self): - # This code gets ran only if the lookup of keyname fails - # After this code has been ran once, the result is stored in + # This code gets run only if the lookup of keyname fails + # After this code has been run once, the result is stored in # _cache with the key: 'keyname' - size = 10.0 + return 10.0 + + @property + @cached('keyname', universe_validation=True) + def othersize(self): + # This code gets run only if the lookup + # id(self) is not in the validation set under + # self.universe._cache['_valid']['keyname'] + # After this code has been run once, id(self) is added to that + # set. The validation set can be centrally invalidated at the + # universe level (say, if a topology change invalidates specific + # caches). + return 20.0 .. versionadded:: 0.9.0 + .. versionchanged::2.0.0 + Added the `universe_validation` keyword. """ def cached_lookup(func): @wraps(func) def wrapper(self, *args, **kwargs): try: + if universe_validation: # Universe-level cache validation + u_cache = self.universe._cache.setdefault('_valid', dict()) + # A WeakSet is used so that keys from out-of-scope/deleted + # objects don't clutter it. + valid_caches = u_cache.setdefault(key, weakref.WeakSet()) + try: + if self._cache_key not in valid_caches: + raise KeyError + except AttributeError: # No _cache_key yet + # Must create a reference key for the validation set. + # self could be used itself as a weakref but set() + # requires hashing it, which can be slow for AGs. Using + # id(self) fails because ints can't be weak-referenced. + self._cache_key = _CacheKey() + raise KeyError return self._cache[key] except KeyError: self._cache[key] = ret = func(self, *args, **kwargs) + if universe_validation: + valid_caches.add(self._cache_key) return ret return wrapper diff --git a/testsuite/MDAnalysisTests/core/test_fragments.py b/testsuite/MDAnalysisTests/core/test_fragments.py index f71c48866e5..d7649dc0096 100644 --- a/testsuite/MDAnalysisTests/core/test_fragments.py +++ b/testsuite/MDAnalysisTests/core/test_fragments.py @@ -212,6 +212,24 @@ def test_atom_fragment_nobonds_NDE(self): with pytest.raises(NoDataError): getattr(u.atoms[10], 'fragindex') + def test_atomgroup_fragment_cache_invalidation_bond_making(self): + u = case1() + fgs = u.atoms.fragments + assert fgs is u.atoms._cache['fragments'] + assert u.atoms._cache_key in u._cache['_valid']['fragments'] + u.add_bonds((fgs[0][-1] + fgs[1][0],)) # should trigger invalidation + assert 'fragments' not in u._cache['_valid'] + assert len(fgs) > len(u.atoms.fragments) # recomputed + + def test_atomgroup_fragment_cache_invalidation_bond_breaking(self): + u = case1() + fgs = u.atoms.fragments + assert fgs is u.atoms._cache['fragments'] + assert u.atoms._cache_key in u._cache['_valid']['fragments'] + u.delete_bonds((u.atoms.bonds[3],)) # should trigger invalidation + assert 'fragments' not in u._cache['_valid'] + assert len(fgs) < len(u.atoms.fragments) # recomputed + def test_tpr_fragments(): ag = mda.Universe(TPR, XTC).atoms diff --git a/testsuite/MDAnalysisTests/lib/test_util.py b/testsuite/MDAnalysisTests/lib/test_util.py index 6e00870217d..a93a1cdea02 100644 --- a/testsuite/MDAnalysisTests/lib/test_util.py +++ b/testsuite/MDAnalysisTests/lib/test_util.py @@ -700,6 +700,11 @@ def __init__(self): self.ref3 = 3.0 self.ref4 = 4.0 self.ref5 = 5.0 + self.ref6 = 6.0 + # For universe-validated caches + # One-line lambda-like class + self.universe = type('Universe', (), dict())() + self.universe._cache = {'_valid': {}} @cached('val1') def val1(self): @@ -742,6 +747,12 @@ def val5(self, n, s=None): def _init_val_5(self, n, s=None): return n * s + # Property decorator and universally-validated cache + @property + @cached('val6', universe_validation=True) + def val6(self): + return self.ref5 + 1.0 + # These are designed to mimic the AG and Universe cache methods def _clear_caches(self, *args): if len(args) == 0: @@ -836,6 +847,34 @@ def test_val5_kwargs(self, obj): assert obj.val5(5, s='!!!') == 5 * 'abc' + # property decorator, with universe validation + def test_val6_universe_validation(self, obj): + obj._clear_caches() + assert not hasattr(obj, '_cache_key') + assert 'val6' not in obj._cache + assert 'val6' not in obj.universe._cache['_valid'] + + ret = obj.val6 # Trigger caching + assert obj.val6 == obj.ref6 + assert ret is obj.val6 + assert 'val6' in obj._cache + assert 'val6' in obj.universe._cache['_valid'] + assert obj._cache_key in obj.universe._cache['_valid']['val6'] + assert obj._cache['val6'] is ret + + # Invalidate cache at universe level + obj.universe._cache['_valid']['val6'].clear() + ret2 = obj.val6 + assert ret2 is obj.val6 + assert ret2 is not ret + + # Clear obj cache and access again + obj._clear_caches() + ret3 = obj.val6 + assert ret3 is obj.val6 + assert ret3 is not ret2 + assert ret3 is not ret + class TestConvFloat(object): @pytest.mark.parametrize('s, output', [ From 950cc1a8eab3a662f781aac84bf348a8aafa39f7 Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Tue, 2 Mar 2021 11:50:01 +0000 Subject: [PATCH 2/2] PEP8 compliance --- benchmarks/benchmarks/ag_methods.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/benchmarks/ag_methods.py b/benchmarks/benchmarks/ag_methods.py index 57e969ba92a..52616ba4511 100644 --- a/benchmarks/benchmarks/ag_methods.py +++ b/benchmarks/benchmarks/ag_methods.py @@ -404,6 +404,7 @@ def setup(self, universe_type): def time_find_fragments(self, universe_type): frags = self.u.atoms.fragments + class FragmentCaching(FragmentFinding): """Test how quickly we find cached fragments""" def setup(self, universe_type):