From 1a309eb05f6692012cdbb8cf149be0ce470c2e35 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 15 Jun 2017 17:18:13 -0700 Subject: [PATCH 1/4] make scipy and matplotlib full dependencies (#1159) - require scipy and matplotlib in setup.py (fixes #1361) - scipy and matplotlib are imported at top in analysis - updated all modules - removed any code that guards against scipy or matplotlib import - conforms to style guide https://github.com/MDAnalysis/mdanalysis/wiki/Style-Guide#module-imports-in-mdanalysisanalysis - fixes #1159 - removed conditional skipping of tests when scipy or matplotlib are missing --- .travis.yml | 7 +- package/CHANGELOG | 1 + package/MDAnalysis/analysis/distances.py | 23 +---- .../MDAnalysis/analysis/encore/similarity.py | 36 ++----- .../analysis/hbonds/hbond_autocorrel.py | 25 ++--- package/MDAnalysis/analysis/hole.py | 16 ++- package/MDAnalysis/analysis/legacy/x3dna.py | 5 +- package/MDAnalysis/analysis/pca.py | 7 +- package/MDAnalysis/analysis/polymer.py | 7 +- package/MDAnalysis/analysis/psa.py | 99 ++++++++++++++----- package/setup.py | 9 +- .../MDAnalysisTests/analysis/test_density.py | 7 -- .../analysis/test_distances.py | 67 ++----------- .../MDAnalysisTests/analysis/test_encore.py | 21 +--- .../MDAnalysisTests/analysis/test_hole.py | 9 +- .../analysis/test_hydrogenbondautocorrel.py | 3 - .../MDAnalysisTests/analysis/test_leaflet.py | 2 - .../MDAnalysisTests/analysis/test_pca.py | 2 - .../analysis/test_persistencelength.py | 14 +-- .../MDAnalysisTests/analysis/test_psa.py | 15 +-- 20 files changed, 140 insertions(+), 235 deletions(-) diff --git a/.travis.yml b/.travis.yml index 77330999d37..dfe47fe6fb9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,10 +25,9 @@ env: - SETUP_CMD="" - COVERALLS=false - BUILD_CMD="pip install -v package/ && pip install testsuite/" - - CONDA_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer" - - CONDA_ALL_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib netcdf4 scikit-learn scipy seaborn coveralls clustalw=2.1" - # Install griddataformats from PIP so that scipy is only installed in the full build (#1147) - - PIP_DEPENDENCIES='griddataformats' + - CONDA_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib scipy griddataformats" + - CONDA_ALL_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib netcdf4 scikit-learn scipy griddataformats seaborn coveralls clustalw=2.1" + - PIP_DEPENDENCIES="" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - NUMPY_VERSION=stable diff --git a/package/CHANGELOG b/package/CHANGELOG index 66827ab51e0..0bd718dc3c5 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -27,6 +27,7 @@ Fixes * Groups are hashable on python 3 (Issue #1397) Changes + * scipy and matplotlib are now required dependencies (Issue #1159) 06/03/17 utkbansal, kain88-de, xiki-tempula, kaplajon, wouterboomsma, diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py index c6473ccd3ae..fc3dc4432a6 100644 --- a/package/MDAnalysis/analysis/distances.py +++ b/package/MDAnalysis/analysis/distances.py @@ -42,6 +42,7 @@ 'contact_matrix', 'dist', 'between'] import numpy as np +import scipy.sparse from MDAnalysis.lib.distances import distance_array, self_distance_array from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc @@ -51,15 +52,6 @@ import logging logger = logging.getLogger("MDAnalysis.analysis.distances") -try: - from scipy import sparse -except ImportError: - sparse = None - msg = "scipy.sparse could not be imported: some functionality will " \ - "not be available in contact_matrix()" - warnings.warn(msg, category=ImportWarning) - logger.warn(msg) - del msg def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): '''Calculates a matrix of contacts. @@ -93,12 +85,6 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): The contact matrix is returned in a format determined by the `returntype` keyword. - - Note - ---- - :mod:`scipy.sparse` is require for using *sparse* matrices; if it cannot - be imported then an `ImportError` is raised. - See Also -------- :mod:`MDAnalysis.analysis.contacts` for native contact analysis @@ -112,14 +98,9 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): adj = (distance_array(coord, coord, box=box) < cutoff) return adj elif returntype == "sparse": - if sparse is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For sparse matrix functionality you need to " - "import scipy.") # Initialize square List of Lists matrix of dimensions equal to number # of coordinates passed - sparse_contacts = sparse.lil_matrix((len(coord), len(coord)), dtype='bool') + sparse_contacts = scipy.sparse.lil_matrix((len(coord), len(coord)), dtype='bool') if box is not None: # with PBC contact_matrix_pbc(coord, sparse_contacts, box, cutoff) diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index d0befb76597..53ec497f5ce 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -172,21 +172,13 @@ from __future__ import print_function, division, absolute_import from six.moves import range, zip -import MDAnalysis as mda -import numpy as np import warnings import logging -try: - from scipy.stats import gaussian_kde -except ImportError: - gaussian_kde = None - msg = "scipy.stats.gaussian_kde could not be imported. " \ - "Dimensionality reduction ensemble comparisons will not " \ - "be available." - warnings.warn(msg, - category=ImportWarning) - logging.warn(msg) - del msg + +import numpy as np +import scipy.stats + +import MDAnalysis as mda from ...coordinates.memory import MemoryReader from .confdistmatrix import get_distance_matrix @@ -460,18 +452,11 @@ def gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, embedded_ensembles = [] resamples = [] - if gaussian_kde is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For Kernel Density Estimation functionality you" - "need to import scipy") - for i in range(1, nensembles + 1): this_embedded = embedded_space.transpose()[ np.where(np.array(ensemble_assignment) == i)].transpose() embedded_ensembles.append(this_embedded) - kdes.append(gaussian_kde( - this_embedded)) + kdes.append(scipy.stats.gaussian_kde(this_embedded)) # # Set number of samples # if not nsamples: @@ -623,12 +608,6 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, """ - if gaussian_kde is None: - # hack: if we are running with minimal dependencies then scipy was - # not imported and we have to bail here (see scipy import at top) - raise ImportError("For Kernel Density Estimation functionality you" - "need to import scipy") - kdes = [] embedded_ensembles = [] resamples = [] @@ -639,8 +618,7 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles, np.logical_and(ensemble_assignment >= ens_id_min, ensemble_assignment <= i))].transpose() embedded_ensembles.append(this_embedded) - kdes.append( - gaussian_kde(this_embedded)) + kdes.append(scipy.stats.gaussian_kde(this_embedded)) # Resample according to probability distributions for this_kde in kdes: diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 00434fa0c12..70c15b8795d 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -155,6 +155,8 @@ from __future__ import division, absolute_import from six.moves import zip import numpy as np +import scipy.optimize + import warnings from MDAnalysis.lib.log import ProgressMeter @@ -162,7 +164,7 @@ class HydrogenBondAutoCorrel(object): - """Perform a time autocorrelation of the hydrogen bonds in the system. + """Perform a time autocorrelation of the hydrogen bonds in the system. Parameters ---------- @@ -421,8 +423,9 @@ def solve(self, p_guess=None): Initial guess for the leastsq fit, must match the shape of the expected coefficients - Continuous defition results are fitted to a double exponential, - intermittent definition are fit to a triple exponential. + Continuous defition results are fitted to a double exponential with + :func:`scipy.optimize.leastsq`, intermittent definition are fit to a + triple exponential. The results of this fitting procedure are saved into the *fit*, *tau* and *estimate* keywords in the solution dict. @@ -434,14 +437,14 @@ def solve(self, p_guess=None): - *estimate* contains the estimate provided by the fit of the time autocorrelation function - In addition, the output of the leastsq function is saved into the - solution dict + In addition, the output of the :func:`~scipy.optimize.leastsq` function + is saved into the solution dict - *infodict* - *mesg* - *ier* + """ - from scipy.optimize import leastsq if self.solution['results'] is None: raise ValueError( @@ -498,9 +501,8 @@ def triple(x, A1, A2, tau1, tau2, tau3): if p_guess is None: p_guess = (0.5, 10 * self.sample_time, self.sample_time) - p, cov, infodict, mesg, ier = leastsq(err, p_guess, - args=(time, results), - full_output=True) + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) self.solution['fit'] = p A1, tau1, tau2 = p A2 = 1 - A1 @@ -512,9 +514,8 @@ def triple(x, A1, A2, tau1, tau2, tau3): p_guess = (0.33, 0.33, 10 * self.sample_time, self.sample_time, 0.1 * self.sample_time) - p, cov, infodict, mesg, ier = leastsq(err, p_guess, - args=(time, results), - full_output=True) + p, cov, infodict, mesg, ier = scipy.optimize.leastsq( + err, p_guess, args=(time, results), full_output=True) self.solution['fit'] = p A1, A2, tau1, tau2, tau3 = p A3 = 1 - A1 - A2 diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index e129c3f07fb..c2fd3cc4ead 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -245,7 +245,6 @@ from six.moves import zip, cPickle import six -import numpy as np import glob import os import errno @@ -258,6 +257,10 @@ import logging from itertools import cycle +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + from MDAnalysis import Universe from MDAnalysis.exceptions import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable @@ -370,8 +373,6 @@ def save(self, filename="hole.pickle"): cPickle.dump(self.profiles, open(filename, "wb"), cPickle.HIGHEST_PROTOCOL) def _process_plot_kwargs(self, kwargs): - import matplotlib.colors - kw = {} frames = kwargs.pop('frames', None) if frames is None: @@ -448,9 +449,6 @@ def plot(self, **kwargs): Returns ``ax``. """ - - import matplotlib.pyplot as plt - kw, kwargs = self._process_plot_kwargs(kwargs) ax = kwargs.pop('ax', None) @@ -517,8 +515,7 @@ def plot3D(self, **kwargs): Returns ``ax``. """ - - import matplotlib.pyplot as plt + # installed with matplotlib; imported here to enable 3D axes from mpl_toolkits.mplot3d import Axes3D kw, kwargs = self._process_plot_kwargs(kwargs) @@ -540,8 +537,7 @@ def plot3D(self, **kwargs): rxncoord = profile.rxncoord else: # does not seem to work with masked arrays but with nan hack! - # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value - # -for-the-z-axis + # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis #radius = np.ma.masked_greater(profile.radius, rmax) #rxncoord = np.ma.array(profile.rxncoord, mask=radius.mask) rxncoord = profile.rxncoord diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py index 76acb3d0560..633ad9def1b 100644 --- a/package/MDAnalysis/analysis/legacy/x3dna.py +++ b/package/MDAnalysis/analysis/legacy/x3dna.py @@ -132,13 +132,15 @@ import errno import shutil import warnings -import numpy as np import os.path import subprocess import tempfile import textwrap from collections import OrderedDict +import numpy as np +import matplotlib.pyplot as plt + from MDAnalysis import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable @@ -413,7 +415,6 @@ def plot(self, **kwargs): Provide `ax` to have all plots plotted in the same axes. """ - import matplotlib.pyplot as plt na_avg, na_std = self.mean_std() for k in range(len(na_avg[0])): diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index ca5e00ef5d3..7e828a166bd 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -106,6 +106,7 @@ import warnings import numpy as np +import scipy.integrate from MDAnalysis import Universe from MDAnalysis.analysis.align import _fit_to @@ -357,9 +358,9 @@ def cosine_content(pca_space, i): .. [BerkHess1] Berk Hess. Convergence of sampling in protein simulations. Phys. Rev. E 65, 031910 (2002). """ - from scipy.integrate import simps + t = np.arange(len(pca_space)) T = len(pca_space) cos = np.cos(np.pi * t * (i + 1) / T) - return ((2.0 / T) * (simps(cos*pca_space[:, i])) ** 2 / - simps(pca_space[:, i] ** 2)) + return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 / + scipy.integrate.simps(pca_space[:, i] ** 2)) diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 315306ec52e..355a063eaee 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -36,6 +36,8 @@ from six.moves import range import numpy as np +import scipy.optimize + import logging from .. import NoDataError @@ -165,13 +167,10 @@ def fit_exponential_decay(x, y): ----- This function assumes that data starts at 1.0 and decays to 0.0 - Requires scipy """ - from scipy.optimize import curve_fit - def expfunc(x, a): return np.exp(-x/a) - a = curve_fit(expfunc, x, y)[0][0] + a = scipy.optimize.curve_fit(expfunc, x, y)[0][0] return a diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index 3e7430e166d..4b91ca9261b 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -216,7 +216,11 @@ from six.moves import range, cPickle import numpy as np -import warnings,numbers +from scipy import spatial, cluster +import matplotlib + +import warnings +import numbers import MDAnalysis import MDAnalysis.analysis.align @@ -396,13 +400,14 @@ def hausdorff(P, Q): Notes ----- - - The Hausdorff distance is calculated in a brute force manner from the - distance matrix without further optimizations, essentially following - [Huttenlocher1993]_. - - :func:`scipy.spatial.distance.directed_hausdorff` is an optimized - implementation of the early break algorithm of [Taha2015]_; note that - one still has to calculate the *symmetric* Hausdorff distance as - `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`. + The Hausdorff distance is calculated in a brute force manner from the + distance matrix without further optimizations, essentially following + [Huttenlocher1993]_. + + :func:`scipy.spatial.distance.directed_hausdorff` is an optimized + implementation of the early break algorithm of [Taha2015]_; note that one + still has to calculate the *symmetric* Hausdorff distance as + `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`. References ---------- @@ -415,6 +420,10 @@ def hausdorff(P, Q): calculating the exact Hausdorff distance. IEEE Transactions On Pattern Analysis And Machine Intelligence, 37:2153-63, 2015. + SeeAlso + ------- + scipy.spatial.distance.directed_hausdorff + """ N, axis = get_coord_axes(P) d = get_msd_matrix(P, Q, axis=axis) @@ -1650,7 +1659,7 @@ def plot(self, filename=None, linkage='ward', count_sort=False, If `filename` is supplied then the figure is also written to file (the suffix determines the file type, e.g. pdf, png, eps, ...). All other - keyword arguments are passed on to :func:`matplotlib.pyplot.imshow`. + keyword arguments are passed on to :func:`matplotlib.pyplot.matshow`. Parameters @@ -1669,6 +1678,15 @@ def plot(self, filename=None, linkage='ward', count_sort=False, set the font size for colorbar labels; font size for path labels on dendrogram default to 3 points smaller [``12``] + Returns + ------- + Z + `Z` from :meth:`cluster` + dgram + `dgram` from :meth:`cluster` + dist_matrix_clus + clustered distance matrix (reordered) + """ from matplotlib.pyplot import figure, colorbar, cm, savefig, clf @@ -1770,6 +1788,23 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ annot_size : float font size of annotation labels on heat map [``6.5``] + Returns + ------- + Z + `Z` from :meth:`cluster` + dgram + `dgram` from :meth:`cluster` + dist_matrix_clus + clustered distance matrix (reordered) + + + Note + ---- + This function requires the seaborn_ package, which can be installed + with `pip install seaborn` or `conda install seaborn`. + + .. _seaborn: https://seaborn.pydata.org/ + """ from matplotlib.pyplot import figure, colorbar, cm, savefig, clf @@ -1870,6 +1905,17 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ set the font size for colorbar labels; font size for path labels on dendrogram default to 3 points smaller [``12``] + Returns + ------- + ax : axes + + Note + ---- + This function requires the seaborn_ package, which can be installed + with `pip install seaborn` or `conda install seaborn`. + + .. _seaborn: https://seaborn.pydata.org/ + """ from matplotlib.pyplot import figure, savefig, tight_layout, clf, show try: @@ -1927,7 +1973,8 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) savefig(outfile, dpi=300, bbox_inches='tight') - show() + + return ax def cluster(self, distArray, method='ward', count_sort=False, \ @@ -1955,22 +2002,28 @@ def cluster(self, distArray, method='ward', count_sort=False, \ Returns ------- - list + Z + output from :func:`scipy.cluster.hierarchy.linkage`; list of indices representing the row-wise order of the objects after clustering + dgram + output from :func:`scipy.cluster.hierarchy.dendrogram` """ - import matplotlib - from scipy.cluster.hierarchy import linkage, dendrogram - + # perhaps there is a better way to manipulate the plot... or perhaps it + # is not even necessary? In any case, the try/finally makes sure that + # we are not permanently changing the user's global state + orig_linewidth = matplotlib.rcParams['lines.linewidth'] matplotlib.rcParams['lines.linewidth'] = 0.5 - - Z = linkage(distArray, method=method) - dgram = dendrogram(Z, no_labels=no_labels, orientation='left', \ - count_sort=count_sort, distance_sort=distance_sort, \ - no_plot=no_plot, color_threshold=color_threshold) + try: + Z = cluster.hierarchy.linkage(distArray, method=method) + dgram = cluster.hierarchy.dendrogram( + Z, no_labels=no_labels, orientation='left', + count_sort=count_sort, distance_sort=distance_sort, + no_plot=no_plot, color_threshold=color_threshold) + finally: + matplotlib.rcParams['lines.linewidth'] = orig_linewidth return Z, dgram - def _get_plot_obj_locs(self): """Find and return coordinates for dendrogram, heat map, and colorbar. @@ -2005,7 +2058,8 @@ def get_num_atoms(self): Returns ------- - the number of atoms + int + the number of atoms Note ---- @@ -2077,8 +2131,7 @@ def get_pairwise_distances(self, vectorform=False): err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." raise ValueError(err_str) if vectorform: - from scipy.spatial.distance import squareform - return squareform(self.D) + return spatial.distance.squareform(self.D) else: return self.D diff --git a/package/setup.py b/package/setup.py index 1688ebc2ee3..4a0144e641c 100755 --- a/package/setup.py +++ b/package/setup.py @@ -494,11 +494,12 @@ def dynamic_author_list(): classifiers=CLASSIFIERS, cmdclass=cmdclass, requires=['numpy (>=1.10.4)', 'biopython', 'mmtf (>=1.0.0)', - 'networkx (>=1.0)', 'GridDataFormats (>=0.3.2)', 'joblib'], + 'networkx (>=1.0)', 'GridDataFormats (>=0.3.2)', 'joblib', + 'scipy', 'matplotlib (>=1.5.1)'], # all standard requirements are available through PyPi and # typically can be installed without difficulties through setuptools setup_requires=[ - 'numpy>=1.9.3', + 'numpy>=1.10.4', ], install_requires=[ 'numpy>=1.10.4', @@ -508,6 +509,8 @@ def dynamic_author_list(): 'six>=1.4.0', 'mmtf-python>=1.0.0', 'joblib', + 'scipy', + 'matplotlib>=1.5.1', ], # extras can be difficult to install through setuptools and/or # you might prefer to use the version available through your @@ -516,8 +519,6 @@ def dynamic_author_list(): 'AMBER': ['netCDF4>=1.0'], # for AMBER netcdf, also needs HDF5 # and netcdf-4 'analysis': [ - 'matplotlib>=1.5.1', - 'scipy', 'seaborn', # for annotated heat map and nearest neighbor # plotting in PSA 'sklearn', # For clustering and dimensionality reduction diff --git a/testsuite/MDAnalysisTests/analysis/test_density.py b/testsuite/MDAnalysisTests/analysis/test_density.py index 94fc1c38dad..6c1b1ca07bb 100644 --- a/testsuite/MDAnalysisTests/analysis/test_density.py +++ b/testsuite/MDAnalysisTests/analysis/test_density.py @@ -30,9 +30,6 @@ assert_raises) import MDAnalysis as mda -# imported inside a skipif-protected method so that it can -# be tested in the absence of scipy -## import MDAnalysis.analysis.density from MDAnalysisTests.datafiles import TPR, XTC, GRO from MDAnalysisTests import module_not_found, tempdir @@ -45,8 +42,6 @@ class TestDensity(TestCase): counts = 100 Lmax = 10. - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def setUp(self): import MDAnalysis.analysis.density @@ -123,8 +118,6 @@ class Test_density_from_Universe(TestCase): cutoffs = {'notwithin': 4.0, } precision = 5 - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def setUp(self): self.outfile = 'density.dx' self.universe = mda.Universe(self.topology, self.trajectory) diff --git a/testsuite/MDAnalysisTests/analysis/test_distances.py b/testsuite/MDAnalysisTests/analysis/test_distances.py index 506c812084c..d2b422f6aa5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_distances.py +++ b/testsuite/MDAnalysisTests/analysis/test_distances.py @@ -21,22 +21,25 @@ # from __future__ import print_function, absolute_import +import scipy +import scipy.spatial + import MDAnalysis from MDAnalysisTests import module_not_found from MDAnalysisTests.datafiles import GRO from MDAnalysisTests.util import block_import +import MDAnalysis.analysis.distances + from numpy.testing import TestCase, assert_equal, dec import numpy as np + import warnings -from mock import Mock, patch import sys class TestContactMatrix(TestCase): - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def setUp(self): import MDAnalysis.analysis.distances self.coord = np.array([[1, 1, 1], @@ -87,17 +90,7 @@ def test_box_sparse(self): assert_equal(contacts.toarray(), self.res_pbc) class TestDist(TestCase): - '''Tests for MDAnalysis.analysis.distances.dist(). - Imports do not happen at the top level of the module - because of the scipy dependency.''' - - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") - def setUp(self): - import MDAnalysis.analysis.distances - import scipy - import scipy.spatial self.u = MDAnalysis.Universe(GRO) self.ag = self.u.atoms[:20] self.u2 = MDAnalysis.Universe(GRO) @@ -142,17 +135,7 @@ def test_mismatch_exception(self): MDAnalysis.analysis.distances.dist(self.ag[:19], self.ag2) class TestBetween(TestCase): - '''Tests for MDAnalysis.analysis.distances.between(). - Imports do not happen at the top level of the module - because of the scipy dependency.''' - - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") - def setUp(self): - import MDAnalysis.analysis.distances - import scipy - import scipy.spatial self.u = MDAnalysis.Universe(GRO) self.ag = self.u.atoms[:10] self.ag2 = self.u.atoms[12:33] @@ -190,41 +173,3 @@ def test_between_simple_case_indices_only(self): self.ag2, self.distance).indices) assert_equal(actual, self.expected) - -class TestImportWarnings(TestCase): - # see unit testing for warnings: - # http://stackoverflow.com/a/3892301 - - def setUp(self): - sys.modules.pop('MDAnalysis.analysis.distances', None) - - @block_import('scipy') - def test_warning_raised_no_scipy_module_level(self): - # an appropriate warning rather than an exception should be - # raised if scipy is absent when importing - # MDAnalysis.analysis.distances - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - import MDAnalysis.analysis.distances - assert issubclass(w[-1].category, ImportWarning) - - def test_silent_success_scipy_present_module_level(self): - # if scipy is present no module level ImportWarning should be - # raised when importing MDAnalysis.analysis.distances - mock = Mock() # mock presence of scipy - with patch.dict('sys.modules', {'scipy':mock}): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - import MDAnalysis.analysis.distances - assert w == [] - - @block_import('scipy') - def test_import_error_contact_matrix_no_scipy(self): - # contact_matrix should raise an ImportError if returntype is - # "sparse" and scipy is not available - with self.assertRaises(ImportError): - np.random.seed(321) - points = np.random.random_sample((10, 3)) - import MDAnalysis.analysis.distances - MDAnalysis.analysis.distances.contact_matrix(points, - returntype="sparse") diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 447375c26c7..a23ad59a094 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -128,7 +128,7 @@ def test_triangular_matrix(): multiplied_triangular_matrix_2 = triangular_matrix_2 * scalar assert_equal(multiplied_triangular_matrix_2[0,1], expected_value * scalar, - err_msg="Error in TriangularMatrix: multiplication by scalar gave\ + err_msg="Error in TriangularMatrix: multiplication by scalar gave\ inconsistent results") triangular_matrix_2 *= scalar @@ -299,8 +299,6 @@ def test_ces(self): assert_almost_equal(result_value, expected_value, decimal=2, err_msg="Unexpected value for Cluster Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_dres_to_self(self): results, details = encore.dres([self.ens1, self.ens1]) result_value = results[0,1] @@ -308,8 +306,6 @@ def test_dres_to_self(self): assert_almost_equal(result_value, expected_value, decimal=2, err_msg="Dim. Reduction Ensemble Similarity to itself not zero: {0:f}".format(result_value)) - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_dres(self): results, details = encore.dres([self.ens1, self.ens2], selection="name CA and resnum 1-10") result_value = results[0,1] @@ -317,8 +313,6 @@ def test_dres(self): self.assertLess(result_value, upper_bound, msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, upper_bound)) - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_dres_without_superimposition(self): distance_matrix = encore.get_distance_matrix( encore.merge_universes([self.ens1, self.ens2]), @@ -338,8 +332,6 @@ def test_ces_convergence(self): assert_almost_equal(ev, results[i], decimal=2, err_msg="Unexpected value for Clustering Ensemble similarity in convergence estimation") - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_dres_convergence(self): expected_values = [ 0.3, 0.] results = encore.dres_convergence(self.ens1, 10) @@ -399,8 +391,6 @@ def test_ces_error_estimation_ensemble_bootstrap(self): err_msg="Unexpected standard daviation for bootstrapped samples in Clustering Ensemble similarity") @dec.slow - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_dres_error_estimation(self): average_upper_bound = 0.3 stdev_upper_bound = 0.2 @@ -847,18 +837,9 @@ def _check_sklearn_import_warns(self, package): warnings.simplefilter('always') assert_warns(ImportWarning, importlib.import_module, package) - @block_import('scipy') - def _check_scipy_import_warns(self, package): - warnings.simplefilter('always') - assert_warns(ImportWarning, importlib.import_module, package) - def test_import_warnings(self): for pkg in ( 'MDAnalysis.analysis.encore.dimensionality_reduction.DimensionalityReductionMethod', 'MDAnalysis.analysis.encore.clustering.ClusteringMethod', ): yield self._check_sklearn_import_warns, pkg - for pkg in ( - 'MDAnalysis.analysis.encore.similarity', - ): - yield self._check_scipy_import_warns, pkg diff --git a/testsuite/MDAnalysisTests/analysis/test_hole.py b/testsuite/MDAnalysisTests/analysis/test_hole.py index 1b4e4ed42b7..32ea4b2d9b3 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hole.py +++ b/testsuite/MDAnalysisTests/analysis/test_hole.py @@ -32,6 +32,9 @@ assert_array_equal, assert_array_almost_equal, assert_) import numpy as np +import matplotlib +import mpl_toolkits.mplot3d + import nose from nose.plugins.attrib import attr @@ -150,27 +153,21 @@ def test_min_radius(self): @attr('slow') @dec.skipif(executable_not_found("hole"), msg="Test skipped because HOLE not found") - @dec.skipif(module_not_found("matplotlib")) def test_plot(self): - import matplotlib.axes ax = self.H.plot(label=True) assert_(isinstance(ax, matplotlib.axes.Axes), msg="H.plot() did not produce an Axes instance") @attr('slow') @dec.skipif(executable_not_found("hole"), msg="Test skipped because HOLE not found") - @dec.skipif(module_not_found("matplotlib")) def test_plot3D(self): - import mpl_toolkits.mplot3d ax = self.H.plot3D() assert_(isinstance(ax, mpl_toolkits.mplot3d.Axes3D), msg="H.plot3D() did not produce an Axes3D instance") @attr('slow') @dec.skipif(executable_not_found("hole"), msg="Test skipped because HOLE not found") - @dec.skipif(module_not_found("matplotlib")) def test_plot3D_rmax(self): - import mpl_toolkits.mplot3d ax = self.H.plot3D(rmax=2.5) assert_(isinstance(ax, mpl_toolkits.mplot3d.Axes3D), msg="H.plot3D(rmax=float) did not produce an Axes3D instance") diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py index 6a65d3bf448..6ae709d489d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbondautocorrel.py @@ -142,7 +142,6 @@ def test_intermittent_excl(self): # For `solve` the test trajectories aren't long enough # So spoof the results and check that solver finds solution - @dec.skipif(module_not_found('scipy')) def test_solve_continuous(self): hbond = HBAC(self.u, hydrogens=self.H, @@ -168,7 +167,6 @@ def actual_function_cont(t): np.array([0.75, 0.5, 0.1]), ) - @dec.skipif(module_not_found('scipy')) def test_solve_intermittent(self): hbond = HBAC(self.u, hydrogens=self.H, @@ -248,7 +246,6 @@ def test_bond_type_VE(self): sample_time=0.06, ) - @dec.skipif(module_not_found('scipy')) def test_solve_before_run_VE(self): hbond = HBAC(self.u, hydrogens=self.H, diff --git a/testsuite/MDAnalysisTests/analysis/test_leaflet.py b/testsuite/MDAnalysisTests/analysis/test_leaflet.py index 151e8bfc55b..00f217e2565 100644 --- a/testsuite/MDAnalysisTests/analysis/test_leaflet.py +++ b/testsuite/MDAnalysisTests/analysis/test_leaflet.py @@ -29,8 +29,6 @@ from MDAnalysisTests.datafiles import Martini_membrane_gro class TestLeafletFinder(TestCase): - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def setUp(self): self.universe = MDAnalysis.Universe(Martini_membrane_gro, Martini_membrane_gro) self.lipid_heads = self.universe.select_atoms("name PO4") diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 0c65aa88c00..02f24c64f3c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -87,8 +87,6 @@ def test_transform_universe(): pca_test.transform(u2) @staticmethod - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_cosine_content(): rand = MDAnalysis.Universe(RANDOM_WALK_TOPO, RANDOM_WALK) pca_random = pca.PCA(rand).run() diff --git a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py index 0ad4f8f24e0..c4e28a50031 100644 --- a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py +++ b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py @@ -24,7 +24,10 @@ import MDAnalysis from MDAnalysis.analysis import polymer from MDAnalysis.exceptions import NoDataError + import numpy as np +import matplotlib + from numpy.testing import ( assert_, assert_almost_equal, @@ -61,8 +64,6 @@ def test_run(self): assert_(len(p.results) == 280) assert_almost_equal(p.lb, 1.485, 3) - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_fit(self): p = self._make_p() p.run() @@ -71,14 +72,9 @@ def test_fit(self): assert_almost_equal(p.lp, 6.504, 3) assert_(len(p.fit) == len(p.results)) - @dec.skipif(module_not_found('matplotlib'), - "Test skipped because matplotlib is not available.") - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_plot_ax_return(self): '''Ensure that a matplotlib axis object is returned when plot() is called.''' - import matplotlib p = self._make_p() p.run() p.perform_fit() @@ -104,14 +100,10 @@ def tearDown(self): del self.a_ref del self.y - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_fit_simple(self): a = polymer.fit_exponential_decay(self.x, self.y) assert_(a == self.a_ref) - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def test_fit_noisy(self): noise = np.sin(self.x) * 0.01 y2 = noise + self.y diff --git a/testsuite/MDAnalysisTests/analysis/test_psa.py b/testsuite/MDAnalysisTests/analysis/test_psa.py index ddc0b8f4e71..9f89b501abf 100644 --- a/testsuite/MDAnalysisTests/analysis/test_psa.py +++ b/testsuite/MDAnalysisTests/analysis/test_psa.py @@ -28,6 +28,8 @@ assert_array_almost_equal, assert_, assert_almost_equal, assert_equal) import numpy as np +import scipy +import scipy.spatial from MDAnalysisTests.datafiles import PSF, DCD, DCD2 from MDAnalysisTests import parser_not_found, tempdir, module_not_found @@ -36,10 +38,6 @@ class TestPSAnalysis(TestCase): @dec.skipif(parser_not_found('DCD'), 'DCD parser not available. Are you using python 3?') - @dec.skipif(module_not_found('matplotlib'), - "Test skipped because matplotlib is not available.") - @dec.skipif(module_not_found('scipy'), - "Test skipped because scipy is not available.") def setUp(self): self.tmpdir = tempdir.TempDir() self.iu1 = np.triu_indices(3, k=1) @@ -187,9 +185,6 @@ class _BaseHausdorffDistance(TestCase): for various Hausdorff distance calculation properties.''' - @dec.skipif(module_not_found('scipy'), - 'scipy not available') - def setUp(self): self.random_angles = np.random.random((100,)) * np.pi * 2 self.random_columns = np.column_stack((self.random_angles, @@ -247,10 +242,9 @@ def setUp(self): class TestWeightedAvgHausdorffSymmetric(_BaseHausdorffDistance): '''Tests for weighted average and symmetric (undirected) Hausdorff distance between point sets in 3D.''' + def setUp(self): super(TestWeightedAvgHausdorffSymmetric, self).setUp() - import scipy - import scipy.spatial self.h = PSA.hausdorff_wavg self.distance_matrix = scipy.spatial.distance.cdist(self.path_1, self.path_2) @@ -270,10 +264,9 @@ def test_asymmetric_weight(self): class TestAvgHausdorffSymmetric(_BaseHausdorffDistance): '''Tests for unweighted average and symmetric (undirected) Hausdorff distance between point sets in 3D.''' + def setUp(self): super(TestAvgHausdorffSymmetric, self).setUp() - import scipy - import scipy.spatial self.h = PSA.hausdorff_avg self.distance_matrix = scipy.spatial.distance.cdist(self.path_1, self.path_2) From f8d5c5ec4ac60fa3649a8da19a76c35a2a50a398 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Sat, 17 Jun 2017 13:38:24 +0200 Subject: [PATCH 2/4] minor clean ups - travis.yml - docs, code style in analysis and analysis tests --- .travis.yml | 1 - .../MDAnalysis/analysis/hbonds/hbond_autocorrel.py | 1 + package/MDAnalysis/analysis/psa.py | 11 ++++++----- testsuite/MDAnalysisTests/analysis/test_encore.py | 12 ++++++------ 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index dfe47fe6fb9..018720eb1ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,6 @@ env: - BUILD_CMD="pip install -v package/ && pip install testsuite/" - CONDA_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib scipy griddataformats" - CONDA_ALL_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib netcdf4 scikit-learn scipy griddataformats seaborn coveralls clustalw=2.1" - - PIP_DEPENDENCIES="" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - NUMPY_VERSION=stable diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 70c15b8795d..6a5bd82f9ab 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -423,6 +423,7 @@ def solve(self, p_guess=None): Initial guess for the leastsq fit, must match the shape of the expected coefficients + Continuous defition results are fitted to a double exponential with :func:`scipy.optimize.leastsq`, intermittent definition are fit to a triple exponential. diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index 4b91ca9261b..62d302211dd 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -409,26 +409,27 @@ def hausdorff(P, Q): still has to calculate the *symmetric* Hausdorff distance as `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`. + References ---------- .. [Huttenlocher1993] D. P. Huttenlocher, G. A. Klanderman, and W. J. Rucklidge. Comparing images using the Hausdorff distance. IEEE Transactions on Pattern Analysis and Machine Intelligence, 15(9):850–863, 1993. - .. [Taha2015] A. A. Taha and A. Hanbury. An efficient algorithm for calculating the exact Hausdorff distance. IEEE Transactions On Pattern Analysis And Machine Intelligence, 37:2153-63, 2015. - SeeAlso - ------- + + See Also + -------- scipy.spatial.distance.directed_hausdorff """ N, axis = get_coord_axes(P) d = get_msd_matrix(P, Q, axis=axis) - return ( max( np.amax(np.amin(d, axis=0)), \ - np.amax(np.amin(d, axis=1)) ) / N )**0.5 + return (max(np.amax(np.amin(d, axis=0)), + np.amax(np.amin(d, axis=1))) / N)**0.5 def hausdorff_wavg(P, Q): diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index a23ad59a094..aff0ff6f11d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -118,18 +118,18 @@ def test_triangular_matrix(): incremented_triangular_matrix = triangular_matrix + scalar assert_equal(incremented_triangular_matrix[0,1], expected_value + scalar, - err_msg="Error in TriangularMatrix: addition of scalar gave\ -inconsistent results") + err_msg="Error in TriangularMatrix: addition of scalar gave" + "inconsistent results") triangular_matrix += scalar assert_equal(triangular_matrix[0,1], expected_value + scalar, - err_msg="Error in TriangularMatrix: addition of scalar gave\ -inconsistent results") + err_msg="Error in TriangularMatrix: addition of scalar gave" + "inconsistent results") multiplied_triangular_matrix_2 = triangular_matrix_2 * scalar assert_equal(multiplied_triangular_matrix_2[0,1], expected_value * scalar, - err_msg="Error in TriangularMatrix: multiplication by scalar gave\ -inconsistent results") + err_msg="Error in TriangularMatrix: multiplication by scalar gave" + "inconsistent results") triangular_matrix_2 *= scalar assert_equal(triangular_matrix_2[0,1], expected_value * scalar, From 8dfe0103d02ab48856c61b76e4ff8e9480c1462b Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Mon, 19 Jun 2017 01:44:14 +0200 Subject: [PATCH 3/4] Initial adaptation of peak.util.imports --- package/MDAnalysis/analysis/density.py | 26 +- package/MDAnalysis/analysis/distances.py | 4 +- .../encore/clustering/ClusteringMethod.py | 529 +++++++++--------- .../DimensionalityReductionMethod.py | 84 ++- .../MDAnalysis/analysis/encore/similarity.py | 7 +- .../analysis/hbonds/hbond_autocorrel.py | 4 + package/MDAnalysis/analysis/hole.py | 7 +- package/MDAnalysis/analysis/legacy/x3dna.py | 5 +- package/MDAnalysis/analysis/pca.py | 6 +- package/MDAnalysis/analysis/polymer.py | 12 +- package/MDAnalysis/analysis/psa.py | 138 +++-- package/MDAnalysis/lib/lazy.py | 271 +++++++++ .../MDAnalysis/visualization/streamlines.py | 16 +- .../visualization/streamlines_3D.py | 7 +- .../analysis/test_distances.py | 1 - .../MDAnalysisTests/analysis/test_encore.py | 24 +- 16 files changed, 678 insertions(+), 463 deletions(-) create mode 100644 package/MDAnalysis/lib/lazy.py diff --git a/package/MDAnalysis/analysis/density.py b/package/MDAnalysis/analysis/density.py index 26d45af3fd5..b77975f9820 100644 --- a/package/MDAnalysis/analysis/density.py +++ b/package/MDAnalysis/analysis/density.py @@ -118,31 +118,7 @@ import os.path import errno import warnings - -try: - from gridData import Grid -except ImportError: - raise ImportError( - """ImportError: The GridDataFormats package can not be found! - - The 'gridData' module from GridDataFormats could not be - imported. Please install it first. You can try installing - directly from the internet: - - pip install GridDataFormats - - or - - conda config --add channels conda-forge - conda install griddataformats - - Alternatively, download the package from - - http://pypi.python.org/pypi/GridDataFormats/ - - and install in the usual manner. - """ - ) +from gridData import Grid import MDAnalysis from MDAnalysis.core import groups diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py index fc3dc4432a6..a240e7406ad 100644 --- a/package/MDAnalysis/analysis/distances.py +++ b/package/MDAnalysis/analysis/distances.py @@ -42,7 +42,6 @@ 'contact_matrix', 'dist', 'between'] import numpy as np -import scipy.sparse from MDAnalysis.lib.distances import distance_array, self_distance_array from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc @@ -52,6 +51,9 @@ import logging logger = logging.getLogger("MDAnalysis.analysis.distances") +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.sparse', level='base') def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None): '''Calculates a matrix of contacts. diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py index a293b755e3e..b2ed24c9d3b 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py @@ -41,16 +41,10 @@ # Import native affinity propagation implementation from . import affinityprop -# Attempt to import scikit-learn clustering algorithms -try: - import sklearn.cluster -except ImportError: - sklearn = None - msg = "sklearn.cluster could not be imported: some functionality will " \ - "not be available in encore.fit_clusters()" - warnings.warn(msg, category=ImportWarning) - logging.warn(msg) - del msg +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# scikit-learn clustering algorithms +sklearn = lazy.import_module('sklearn.cluster', level='base') def encode_centroid_info(clusters, cluster_centers_indices): @@ -158,270 +152,269 @@ def __call__(self, distance_matrix): details = {} return clusters, details -if sklearn: - class AffinityPropagation(ClusteringMethod): +class AffinityPropagation(ClusteringMethod): + """ + Interface to the Affinity propagation clustering procedure implemented + in sklearn. + """ + + def __init__(self, + damping=0.9, preference=-1.0, + max_iter=500, convergence_iter=50, + **kwargs): + """ + Parameters + ---------- + + damping : float, optional + Damping factor (default is 0.9). Parameter for the Affinity + Propagation for clustering. + + preference : float, optional + Preference parameter used in the Affinity Propagation algorithm + for clustering (default -1.0). A high preference value results + in many clusters, a low preference will result in fewer numbers + of clusters. + + max_iter : int, optional + Maximum number of iterations for affinity propagation (default + is 500). + + convergence_iter : int, optional + Minimum number of unchanging iterations to achieve convergence + (default is 50). Parameter in the Affinity Propagation for + clustering. + + """ + self.ap = \ + sklearn.cluster.AffinityPropagation( + damping=damping, + preference=preference, + max_iter=max_iter, + convergence_iter=convergence_iter, + affinity="precomputed", + **kwargs) + + def __call__(self, distance_matrix): + """ + Parameters + ---------- + + distance_matrix : encore.utils.TriangularMatrix + conformational distance matrix + + Returns + ------- + numpy.array + list of cluster indices + + """ + logging.info("Starting Affinity Propagation: {0}".format + (self.ap.get_params())) + + # Convert from distance matrix to similarity matrix + similarity_matrix = distance_matrix.as_array() * -1 + clusters = self.ap.fit_predict(similarity_matrix) + clusters = encode_centroid_info(clusters, + self.ap.cluster_centers_indices_) + details = {} + return clusters, details + + +class DBSCAN(ClusteringMethod): + """ + Interface to the DBSCAN clustering procedure implemented in sklearn. + """ + def __init__(self, + eps=0.5, + min_samples=5, + algorithm="auto", + leaf_size=30, + **kwargs): """ - Interface to the Affinity propagation clustering procedure implemented - in sklearn. + Parameters + ---------- + + eps : float, optional (default = 0.5) + The maximum distance between two samples for them to be + considered as in the same neighborhood. + + min_samples : int, optional (default = 5) + The number of samples (or total weight) in a neighborhood for + a point to be considered as a core point. This includes the + point itself. + + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + The algorithm to be used by the NearestNeighbors module + to compute pointwise distances and find nearest neighbors. + See NearestNeighbors module documentation for details. + + leaf_size : int, optional (default = 30) + Leaf size passed to BallTree or cKDTree. This can affect the + speed of the construction and query, as well as the memory + required to store the tree. The optimal value depends + on the nature of the problem. + + sample_weight : array, shape (n_samples,), optional + Weight of each sample, such that a sample with a weight of at + least ``min_samples`` is by itself a core sample; a sample with + negative weight may inhibit its eps-neighbor from being core. + Note that weights are absolute, and default to 1. + + """ + + self.dbscan = sklearn.cluster.DBSCAN(eps=eps, + min_samples = min_samples, + algorithm=algorithm, + leaf_size = leaf_size, + metric="precomputed", + **kwargs) + + def __call__(self, distance_matrix): """ + Parameters + ---------- + + distance_matrix : encore.utils.TriangularMatrix + conformational distance matrix + + + Returns + ------- + numpy.array + list of cluster indices - def __init__(self, - damping=0.9, preference=-1.0, - max_iter=500, convergence_iter=50, - **kwargs): - """ - Parameters - ---------- - - damping : float, optional - Damping factor (default is 0.9). Parameter for the Affinity - Propagation for clustering. - - preference : float, optional - Preference parameter used in the Affinity Propagation algorithm - for clustering (default -1.0). A high preference value results - in many clusters, a low preference will result in fewer numbers - of clusters. - - max_iter : int, optional - Maximum number of iterations for affinity propagation (default - is 500). - - convergence_iter : int, optional - Minimum number of unchanging iterations to achieve convergence - (default is 50). Parameter in the Affinity Propagation for - clustering. - - """ - self.ap = \ - sklearn.cluster.AffinityPropagation( - damping=damping, - preference=preference, - max_iter=max_iter, - convergence_iter=convergence_iter, - affinity="precomputed", - **kwargs) - - def __call__(self, distance_matrix): - """ - Parameters - ---------- - - distance_matrix : encore.utils.TriangularMatrix - conformational distance matrix - - Returns - ------- - numpy.array - list of cluster indices - - """ - logging.info("Starting Affinity Propagation: {0}".format - (self.ap.get_params())) - - # Convert from distance matrix to similarity matrix - similarity_matrix = distance_matrix.as_array() * -1 - clusters = self.ap.fit_predict(similarity_matrix) - clusters = encode_centroid_info(clusters, - self.ap.cluster_centers_indices_) - details = {} - return clusters, details - - - class DBSCAN(ClusteringMethod): """ - Interface to the DBSCAN clustering procedure implemented in sklearn. + logging.info("Starting DBSCAN: {0}".format( + self.dbscan.get_params())) + clusters = self.dbscan.fit_predict(distance_matrix.as_array()) + if np.min(clusters == -1): + clusters += 1 + # No centroid information is provided by DBSCAN, so we just + # pick random members + cluster_representatives = np.unique(clusters, return_index=True)[1] + clusters = encode_centroid_info(clusters, + cluster_representatives) + details = {} + return clusters, details + +class KMeans(ClusteringMethod): + + # Whether the method accepts a distance matrix + accepts_distance_matrix = False + + """ + Interface to the KMeans clustering procedure implemented in sklearn. + """ + def __init__(self, + n_clusters, + max_iter = 300, + n_init = 10, + init = 'k-means++', + algorithm="auto", + tol = 1e-4, + verbose=False, + random_state=None, + copy_x=True, + n_jobs=1, + **kwargs): """ - def __init__(self, - eps=0.5, - min_samples=5, - algorithm="auto", - leaf_size=30, - **kwargs): - """ - Parameters - ---------- - - eps : float, optional (default = 0.5) - The maximum distance between two samples for them to be - considered as in the same neighborhood. - - min_samples : int, optional (default = 5) - The number of samples (or total weight) in a neighborhood for - a point to be considered as a core point. This includes the - point itself. - - algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional - The algorithm to be used by the NearestNeighbors module - to compute pointwise distances and find nearest neighbors. - See NearestNeighbors module documentation for details. - - leaf_size : int, optional (default = 30) - Leaf size passed to BallTree or cKDTree. This can affect the - speed of the construction and query, as well as the memory - required to store the tree. The optimal value depends - on the nature of the problem. - - sample_weight : array, shape (n_samples,), optional - Weight of each sample, such that a sample with a weight of at - least ``min_samples`` is by itself a core sample; a sample with - negative weight may inhibit its eps-neighbor from being core. - Note that weights are absolute, and default to 1. - - """ - - self.dbscan = sklearn.cluster.DBSCAN(eps=eps, - min_samples = min_samples, - algorithm=algorithm, - leaf_size = leaf_size, - metric="precomputed", - **kwargs) - - def __call__(self, distance_matrix): - """ - Parameters - ---------- - - distance_matrix : encore.utils.TriangularMatrix - conformational distance matrix - - - Returns - ------- - numpy.array - list of cluster indices - - """ - logging.info("Starting DBSCAN: {0}".format( - self.dbscan.get_params())) - clusters = self.dbscan.fit_predict(distance_matrix.as_array()) - if np.min(clusters == -1): - clusters += 1 - # No centroid information is provided by DBSCAN, so we just - # pick random members - cluster_representatives = np.unique(clusters, return_index=True)[1] - clusters = encode_centroid_info(clusters, - cluster_representatives) - details = {} - return clusters, details - - class KMeans(ClusteringMethod): - - # Whether the method accepts a distance matrix - accepts_distance_matrix = False + Parameters + ---------- + n_clusters : int + The number of clusters to form as well as the number of + centroids to generate. + + max_iter : int, optional (default 300) + Maximum number of iterations of the k-means algorithm to run. + + n_init : int, optional (default 10) + Number of time the k-means algorithm will be run with different + centroid seeds. The final results will be the best output of + n_init consecutive runs in terms of inertia. + + init : {'k-means++', 'random', or ndarray, or a callable}, optional + Method for initialization, default to 'k-means++': + 'k-means++' : selects initial cluster centers for k-mean + clustering in a smart way to speed up convergence. See section + Notes in k_init for more details. + 'random': generate k centroids from a Gaussian with mean and + variance estimated from the data. + If an ndarray is passed, it should be of shape + (n_clusters, n_features) and gives the initial centers. + If a callable is passed, it should take arguments X, k and + and a ranndom state and return an initialization. + + precompute_distances : {'auto', True, False} + Precompute distances (faster but takes more memory). + 'auto' : do not precompute distances if + n_samples * n_clusters > 12 million. This corresponds to about + 100MB overhead per job using double precision. + True : always precompute distances + False : never precompute distances + + tol : float, optional (default 1e-4) + The relative increment in the results before declaring + convergence. + + verbose : boolean, optional (default False) + Verbosity mode. + + random_state : integer or numpy.RandomState, optional + The generator used to initialize the centers. If an integer is + given, it fixes the seed. Defaults to the global numpy random + number generator. + + copy_x : boolean, optional + When pre-computing distances it is more numerically accurate to + center the data first. If copy_x is True, then the original + data is not modified. If False, the original data is modified, + and put back before the function returns, but small numerical + differences may be introduced by subtracting and then adding + the data mean. + + n_jobs : int + The number of jobs to use for the computation. This works by + computing each of the n_init runs in parallel. If -1 all CPUs + are used. If 1 is given, no parallel computing code is used at + all, which is useful for debugging. For n_jobs below -1, + (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs + but one are used. """ - Interface to the KMeans clustering procedure implemented in sklearn. + self.kmeans = sklearn.cluster.KMeans(n_clusters = n_clusters, + max_iter = max_iter, + n_init = n_init, + init = init, + precompute_distances='auto', + tol = tol, + verbose=verbose, + random_state=random_state, + copy_x=copy_x, + n_jobs=n_jobs, + **kwargs) + + def __call__(self, coordinates): """ - def __init__(self, - n_clusters, - max_iter = 300, - n_init = 10, - init = 'k-means++', - algorithm="auto", - tol = 1e-4, - verbose=False, - random_state=None, - copy_x=True, - n_jobs=1, - **kwargs): - """ - Parameters - ---------- - n_clusters : int - The number of clusters to form as well as the number of - centroids to generate. - - max_iter : int, optional (default 300) - Maximum number of iterations of the k-means algorithm to run. - - n_init : int, optional (default 10) - Number of time the k-means algorithm will be run with different - centroid seeds. The final results will be the best output of - n_init consecutive runs in terms of inertia. - - init : {'k-means++', 'random', or ndarray, or a callable}, optional - Method for initialization, default to 'k-means++': - 'k-means++' : selects initial cluster centers for k-mean - clustering in a smart way to speed up convergence. See section - Notes in k_init for more details. - 'random': generate k centroids from a Gaussian with mean and - variance estimated from the data. - If an ndarray is passed, it should be of shape - (n_clusters, n_features) and gives the initial centers. - If a callable is passed, it should take arguments X, k and - and a ranndom state and return an initialization. - - precompute_distances : {'auto', True, False} - Precompute distances (faster but takes more memory). - 'auto' : do not precompute distances if - n_samples * n_clusters > 12 million. This corresponds to about - 100MB overhead per job using double precision. - True : always precompute distances - False : never precompute distances - - tol : float, optional (default 1e-4) - The relative increment in the results before declaring - convergence. - - verbose : boolean, optional (default False) - Verbosity mode. - - random_state : integer or numpy.RandomState, optional - The generator used to initialize the centers. If an integer is - given, it fixes the seed. Defaults to the global numpy random - number generator. - - copy_x : boolean, optional - When pre-computing distances it is more numerically accurate to - center the data first. If copy_x is True, then the original - data is not modified. If False, the original data is modified, - and put back before the function returns, but small numerical - differences may be introduced by subtracting and then adding - the data mean. - - n_jobs : int - The number of jobs to use for the computation. This works by - computing each of the n_init runs in parallel. If -1 all CPUs - are used. If 1 is given, no parallel computing code is used at - all, which is useful for debugging. For n_jobs below -1, - (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs - but one are used. - - """ - self.kmeans = sklearn.cluster.KMeans(n_clusters = n_clusters, - max_iter = max_iter, - n_init = n_init, - init = init, - precompute_distances='auto', - tol = tol, - verbose=verbose, - random_state=random_state, - copy_x=copy_x, - n_jobs=n_jobs, - **kwargs) - - def __call__(self, coordinates): - """ - Parameters - ---------- - - coordinates : np.array - trajectory atom coordinates - - - Returns - ------- - numpy.array - list of cluster indices - """ - logging.info("Starting Kmeans: {0}".format( - (self.kmeans.get_params()))) - clusters = self.kmeans.fit_predict(coordinates) - distances = self.kmeans.transform(coordinates) - cluster_center_indices = np.argmin(distances, axis=0) - clusters = encode_centroid_info(clusters, - cluster_center_indices) - details = {} - return clusters, details + Parameters + ---------- + + coordinates : np.array + trajectory atom coordinates + + + Returns + ------- + numpy.array + list of cluster indices + """ + logging.info("Starting Kmeans: {0}".format( + (self.kmeans.get_params()))) + clusters = self.kmeans.fit_predict(coordinates) + distances = self.kmeans.transform(coordinates) + cluster_center_indices = np.argmin(distances, axis=0) + clusters = encode_centroid_info(clusters, + cluster_center_indices) + details = {} + return clusters, details diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py index dfb6226d2e8..f4f015e7f97 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py @@ -40,15 +40,10 @@ # Import native affinity propagation implementation from . import stochasticproxembed -# Attempt to import scikit-learn clustering algorithms -try: - import sklearn.decomposition -except ImportError: - sklearn = None - import warnings - warnings.warn("sklearn.decomposition could not be imported: some " - "functionality will not be available in " - "encore.dimensionality_reduction()", category=ImportWarning) +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# scikit-learn clustering algorithms +sklearn = lazy.import_module('sklearn.decomposition', level='base') class DimensionalityReductionMethod (object): @@ -150,45 +145,42 @@ def __call__(self, distance_matrix): return coordinates, {"final_stress": final_stress} +class PrincipalComponentAnalysis(DimensionalityReductionMethod): + """ + Interface to the PCA dimensionality reduction method implemented in + sklearn. + """ -if sklearn: + # Whether the method accepts a distance matrix + accepts_distance_matrix = False - class PrincipalComponentAnalysis(DimensionalityReductionMethod): + def __init__(self, + dimension = 2, + **kwargs): + """ + Parameters + ---------- + + dimension : int + Number of dimensions to which the conformational space will be + reduced to (default is 3). """ - Interface to the PCA dimensionality reduction method implemented in - sklearn. + self.pca = sklearn.decomposition.PCA(n_components=dimension, + **kwargs) + + def __call__(self, coordinates): """ + Parameters + ---------- + + coordinates : np.array + trajectory atom coordinates + - # Whether the method accepts a distance matrix - accepts_distance_matrix = False - - def __init__(self, - dimension = 2, - **kwargs): - """ - Parameters - ---------- - - dimension : int - Number of dimensions to which the conformational space will be - reduced to (default is 3). - """ - self.pca = sklearn.decomposition.PCA(n_components=dimension, - **kwargs) - - def __call__(self, coordinates): - """ - Parameters - ---------- - - coordinates : np.array - trajectory atom coordinates - - - Returns - ------- - numpy.array - coordinates in reduced space - """ - coordinates = self.pca.fit_transform(coordinates) - return coordinates.T, {} + Returns + ------- + numpy.array + coordinates in reduced space + """ + coordinates = self.pca.fit_transform(coordinates) + return coordinates.T, {} diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index 53ec497f5ce..dce5f3e63f5 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -19,7 +19,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -"""================================================================================= +r""" Ensemble Similarity Calculations --- :mod:`MDAnalysis.analysis.encore.similarity` ================================================================================= @@ -176,7 +176,6 @@ import logging import numpy as np -import scipy.stats import MDAnalysis as mda @@ -195,6 +194,10 @@ from .utils import merge_universes from .utils import trm_indices_diag, trm_indices_nodiag +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.stats', level='base') + # Low boundary value for log() argument - ensure no nans EPSILON = 1E-15 diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py index 6a5bd82f9ab..e21a457fdac 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py @@ -162,6 +162,10 @@ from MDAnalysis.lib.log import ProgressMeter from MDAnalysis.lib.distances import distance_array, calc_angles, calc_bonds +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +leastsq = lazy.import_function('scipy.optimize.leastsq') + class HydrogenBondAutoCorrel(object): """Perform a time autocorrelation of the hydrogen bonds in the system. diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index c2fd3cc4ead..c6210e5a858 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -258,14 +258,17 @@ from itertools import cycle import numpy as np -import matplotlib -import matplotlib.pyplot as plt from MDAnalysis import Universe from MDAnalysis.exceptions import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable from MDAnalysis.lib.util import FORTRANReader +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +# This makes 'cm' available as an attr of 'matplotlib' +matplotlib = lazy.import_module('matplotlib.cm', level='base') +plt = lazy.import_module('matplotlib.pyplot') logger = logging.getLogger("MDAnalysis.analysis.hole") diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py index 633ad9def1b..79062dfe276 100644 --- a/package/MDAnalysis/analysis/legacy/x3dna.py +++ b/package/MDAnalysis/analysis/legacy/x3dna.py @@ -139,11 +139,14 @@ from collections import OrderedDict import numpy as np -import matplotlib.pyplot as plt from MDAnalysis import ApplicationError from MDAnalysis.lib.util import which, realpath, asiterable +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +plt = lazy.import_module('matplotlib.pyplot') + import logging logger = logging.getLogger("MDAnalysis.analysis.x3dna") diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 7e828a166bd..eb748c26ae3 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -106,14 +106,16 @@ import warnings import numpy as np -import scipy.integrate from MDAnalysis import Universe from MDAnalysis.analysis.align import _fit_to from MDAnalysis.lib.log import ProgressMeter -from .base import AnalysisBase +from MDAnalysis.analysis.base import AnalysisBase +# Optional and/or lazily imported modules +from MDAnalysis.lib import lazy +scipy = lazy.import_function('scipy.integrate', level='base') class PCA(AnalysisBase): """Principal component analysis on an MD trajectory. diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 355a063eaee..ec0fe4ffcb9 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -40,9 +40,14 @@ import logging -from .. import NoDataError -from ..lib.distances import calc_bonds -from .base import AnalysisBase +from MDAnalysis import NoDataError +from MDAnalysis.lib.distances import calc_bonds +from MDAnalysis.analysis.base import AnalysisBase + +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +curve_fit = lazy.import_function('scipy.optimize.curve_fit') +plt = lazy.import_module('matplotlib.pyplot') logger = logging.getLogger(__name__) @@ -138,7 +143,6 @@ def perform_fit(self): def plot(self, ax=None): """Oooh fancy""" - import matplotlib.pyplot as plt if ax is None: ax = plt.gca() ax.plot(self.x, self.results, 'ro', label='Result') diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index 62d302211dd..a8c7cd03a9d 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -216,8 +216,6 @@ from six.moves import range, cPickle import numpy as np -from scipy import spatial, cluster -import matplotlib import warnings import numbers @@ -226,6 +224,18 @@ import MDAnalysis.analysis.align from MDAnalysis import NoDataError +# Optional and/or lazily loaded modules +#from scipy import spatial, cluster +#import matplotlib +from MDAnalysis.lib import lazy +spatial = lazy.import_module("scipy.spatial") +cluster = lazy.import_module("scipy.cluster") + +matplotlib = lazy.import_module('matplotlib') +plt = lazy.import_module('matplotlib.pyplot') + +sns = lazy.import_module('seaborn.apionly') + import os import logging @@ -1689,7 +1699,6 @@ def plot(self, filename=None, linkage='ward', count_sort=False, clustered distance matrix (reordered) """ - from matplotlib.pyplot import figure, colorbar, cm, savefig, clf if self.D is None: err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." @@ -1699,14 +1708,14 @@ def plot(self, filename=None, linkage='ward', count_sort=False, dgram_loc, hmap_loc, cbar_loc = self._get_plot_obj_locs() aspect_ratio = 1.25 - clf() - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + plt.clf() + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax_hmap = fig.add_axes(hmap_loc) ax_dgram = fig.add_axes(dgram_loc) - Z, dgram = self.cluster(dist_matrix, \ - method=linkage, \ - count_sort=count_sort, \ + Z, dgram = self.cluster(dist_matrix, + method=linkage, + count_sort=count_sort, distance_sort=distance_sort) rowidx = colidx = dgram['leaves'] # get row-wise ordering from clustering ax_dgram.invert_yaxis() # Place origin at up left (from low left) @@ -1714,26 +1723,44 @@ def plot(self, filename=None, linkage='ward', count_sort=False, minDist, maxDist = 0, np.max(dist_matrix) dist_matrix_clus = dist_matrix[rowidx,:] dist_matrix_clus = dist_matrix_clus[:,colidx] - im = ax_hmap.matshow(dist_matrix_clus, aspect='auto', origin='lower', \ - cmap=cm.YlGn, vmin=minDist, vmax=maxDist) + im = ax_hmap.matshow(dist_matrix_clus, + aspect='auto', + origin='lower', + cmap=plt.cm.YlGn, + vmin=minDist, + vmax=maxDist) ax_hmap.invert_yaxis() # Place origin at upper left (from lower left) ax_hmap.locator_params(nbins=npaths) ax_hmap.set_xticks(np.arange(npaths), minor=True) ax_hmap.set_yticks(np.arange(npaths), minor=True) - ax_hmap.tick_params(axis='x', which='both', labelleft='off', \ - labelright='off', labeltop='on', labelsize=0) - ax_hmap.tick_params(axis='y', which='both', labelleft='on', \ - labelright='off', labeltop='off', labelsize=0) + ax_hmap.tick_params(axis='x', + which='both', + labelleft='off', + labelright='off', + labeltop='on', + labelsize=0) + ax_hmap.tick_params(axis='y', + which='both', + labelleft='on', + labelright='off', + labeltop='off', + labelsize=0) rowlabels = [self.labels[i] for i in rowidx] collabels = [self.labels[i] for i in colidx] - ax_hmap.set_xticklabels(collabels, rotation='vertical', \ - size=(labelsize-4), multialignment='center', minor=True) - ax_hmap.set_yticklabels(rowlabels, rotation='horizontal', \ - size=(labelsize-4), multialignment='left', ha='right', \ - minor=True) + ax_hmap.set_xticklabels(collabels, + rotation='vertical', + size=(labelsize-4), + multialignment='center', + minor=True) + ax_hmap.set_yticklabels(rowlabels, + rotation='horizontal', + size=(labelsize-4), + multialignment='left', + ha='right', + minor=True) ax_color = fig.add_axes(cbar_loc) - colorbar(im, cax=ax_color, ticks=np.linspace(minDist, maxDist, 10), \ + plt.colorbar(im, cax=ax_color, ticks=np.linspace(minDist, maxDist, 10), \ format="%0.1f") ax_color.tick_params(labelsize=labelsize) @@ -1756,7 +1783,7 @@ def plot(self, filename=None, linkage='ward', count_sort=False, if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=300, bbox_inches='tight') + plt.savefig(outfile, dpi=300, bbox_inches='tight') return Z, dgram, dist_matrix_clus @@ -1807,28 +1834,6 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ .. _seaborn: https://seaborn.pydata.org/ """ - from matplotlib.pyplot import figure, colorbar, cm, savefig, clf - - try: - import seaborn.apionly as sns - except ImportError: - raise ImportError( - """ERROR --- The seaborn package cannot be found! - - The seaborn API could not be imported. Please install it first. - You can try installing with pip directly from the - internet: - - pip install seaborn - - Alternatively, download the package from - - http://pypi.python.org/pypi/seaborn/ - - and install in the usual manner. - """ - ) - if self.D is None: err_str = "No distance data; do 'PSAnalysis.run(store=True)' first." raise ValueError(err_str) @@ -1843,14 +1848,18 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ dist_matrix_clus = dist_matrix[rowidx,:] dist_matrix_clus = dist_matrix_clus[:,colidx] - clf() + plt.clf() aspect_ratio = 1.25 - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax_hmap = fig.add_subplot(111) - ax_hmap = sns.heatmap(dist_matrix_clus, \ - linewidths=0.25, cmap=cm.YlGn, annot=True, fmt='3.1f', \ - square=True, xticklabels=rowidx, yticklabels=colidx, \ - annot_kws={"size": 7}, ax=ax_hmap) + ax_hmap = sns.heatmap(dist_matrix_clus, + linewidths=0.25, cmap=plt.cm.YlGn, + annot=True, fmt='3.1f', + square=True, + xticklabels=rowidx, + yticklabels=colidx, + annot_kws={"size": 7}, + ax=ax_hmap) # Remove major ticks from both heat map axes for tic in ax_hmap.xaxis.get_major_ticks(): @@ -1868,7 +1877,7 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=600, bbox_inches='tight') + plt.savefig(outfile, dpi=600, bbox_inches='tight') return Z, dgram, dist_matrix_clus @@ -1918,27 +1927,6 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ .. _seaborn: https://seaborn.pydata.org/ """ - from matplotlib.pyplot import figure, savefig, tight_layout, clf, show - try: - import seaborn.apionly as sns - except ImportError: - raise ImportError( - """ERROR --- The seaborn package cannot be found! - - The seaborn API could not be imported. Please install it first. - You can try installing with pip directly from the - internet: - - pip install seaborn - - Alternatively, download the package from - - http://pypi.python.org/pypi/seaborn/ - - and install in the usual manner. - """ - ) - colors = sns.xkcd_palette(["cherry", "windows blue"]) if self._NN is None: @@ -1949,8 +1937,8 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ sns.set_style('whitegrid') if not multiplot: - clf() - fig = figure(figsize=(figsize*aspect_ratio, figsize)) + plt.clf() + fig = plt.figure(figsize=(figsize*aspect_ratio, figsize)) ax = fig.add_subplot(111) nn_dist_P, nn_dist_Q = self._NN[idx]['distances'] @@ -1968,12 +1956,12 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ ax.tick_params(axis='both', which='major', labelsize=12, pad=4) sns.despine(bottom=True, left=True, ax=ax) - tight_layout() + plt.tight_layout() if filename is not None: head = self.targetdir + self.datadirs['plots'] outfile = os.path.join(head, filename) - savefig(outfile, dpi=300, bbox_inches='tight') + plt.savefig(outfile, dpi=300, bbox_inches='tight') return ax diff --git a/package/MDAnalysis/lib/lazy.py b/package/MDAnalysis/lib/lazy.py new file mode 100644 index 00000000000..eaaff6f9b5e --- /dev/null +++ b/package/MDAnalysis/lib/lazy.py @@ -0,0 +1,271 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- http://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +# This module was based on code from the importing module from the PEAK +# package (see http://peak.telecommunity.com/DevCenter/FrontPage). The PEAK +# package is released under the following license, reproduced here: +# +# Copyright (C) 1996-2004 by Phillip J. Eby and Tyler C. Sarna. +# All rights reserved. This software may be used under the same terms +# as Zope or Python. THERE ARE ABSOLUTELY NO WARRANTIES OF ANY KIND. +# Code quality varies between modules, from "beta" to "experimental +# pre-alpha". :) +# +# The following list summarizes the modifications to the importing code: +# - a replacement of lazyModule (import_module, which defers most work to +# _import_module) is implemented that uses an alternative LazyModule class; +# - a different LazyModule class is created per instance, so that reverting +# the __getattribute__ behavior can be done safely; +# - a function to lazily import module functions was added. + + +""" +Lazy module loading --- :mod:`MDAnalysis.lib.lazy` +==================================================== + +Functions and classes for lazy module loading that also delay import errors. +Heavily borrowed from the `importing`_ module, which is not very +subclass-friendly. + +.. versionadded:: 0.16.2 +.. _`importing`: http://peak.telecommunity.com/DevCenter/Importing + +Files and directories +--------------------- + +.. autofunction:: import_module +.. autofunction:: import_function + +""" + +__all__ = ['import_module', 'import_function'] + +from types import ModuleType +import sys +import imp + +_MSG = ("{0} attempted to use a functionality that requires module {1}, but " + "it couldn't be loaded. Please install {2} and retry.") + +_MSG_FN = ("{0} attempted to use a functionality that requires function {1} " + "of module {2}, but it couldn't be found in that module. Please " + "install a version of {2} that has {1} and retry.") + +class LazyModule(ModuleType): + # peak.util.imports sets __slots__ to (), but it seems pointless because + # the base ModuleType doesn't itself set __slots__. + #__mda_lazy_armed__ = True + + def __init__(self, modname): + super(ModuleType, self).__setattr__('__name__', modname) + + def __getattribute__(self, attr): + #if (attr != '__mda_lazy_armed__' and + # self.__mda_lazy_armed__): + print("getting attr {} from module '{}'".format(attr, + super(ModuleType, self).__getattribute__('__name__'))) + _load_module(self) + return ModuleType.__getattribute__(self, attr) + + def __setattr__(self, attr, value): + #if attr != '__mda_lazy_armed__' and self.__mda_lazy_armed__: + print("setting attr {}".format(attr)) + _load_module(self) + return ModuleType.__setattr__(self, attr, value) + +def _load_module(module): + modclass = type(module) + # We only take care of our own LazyModule instances + if not issubclass(modclass, LazyModule): + return + imp.acquire_lock() + try: + modclass.__getattribute__ = ModuleType.__getattribute__ + modclass.__setattr__ = ModuleType.__setattr__ + try: + # Alreay-loaded _LazyModule classes lose their + # _mda_lazy_caller_name attr. No need to redo + # those cases. + caller_name = modclass._mda_lazy_caller_name + except AttributeError: + return + del modclass._mda_lazy_caller_name + # don't reload if already loaded! + #if module.__dict__.keys() == ['__name__']: + #if (set(ModuleType.__getattribute__(module, '__dict__').keys()) == + # set(('__name__', '_mda_lazy_caller_name'))): + print("loading module '{}'".format(module)) + #module.__mda_lazy_armed__ = False + # First, ensure the parent is loaded + # (using recursion; negligible chance we'll ever hit a stack limit + # in this case). + parent, _, modname = module.__name__.rpartition('.') + if parent: + _load_module(sys.modules[parent]) + setattr(sys.modules[parent], modname, module) + # Get Python to do the real import! + try: + reload(module) + except: + #module.__mda_lazy_armed__ = True + del modclass.__getattribute__ + del modclass.__setattr__ + modclass._mda_lazy_caller_name = caller_name + raise + #del module.__mda_lazy_armed__ + print("done loading module '{}'".format(module)) + except ImportError as err: + print("Got an ImportError: '{}'".format(err)) + modname = ModuleType.__getattribute__(module, '__name__') + base_modname = modname.split(".")[0] + raise ImportError(_MSG.format(caller_name, modname, base_modname)) + finally: + imp.release_lock() + +def _caller_name(depth=2): + # the presence of sys._getframe might be implementation-dependent. + # It isn't that serious if we can't get the caller's name. + try: + return sys._getframe(depth).f_globals['__name__'] + except AttributeError: + return 'MDAnalysis' + +def import_module(modname, level='leaf'): + """Function allowing lazy importing of a module into the namespace + + Parameters + ---------- + modname : str + The module to import. + level : str, optional + Which submodule reference to return. Either a reference to the 'leaf' + module (the default) or to the 'base' module. For 'base':: + + MDAnalysis = import_module("MDAnalysis.analysis.distances", + level='base') + # 'MDAnalysis' becomes defined in the current namespace, with + # (sub)attributes 'MDAnalysis.analysis' and + # 'MDAnalysis.analysis.distances'. + # Equivalent to: + import MDAnalysis.analysis.distances + + For 'leaf':: + + distances = import_module("MDAnalysis.analysis.distances", + level='leaf') + # Only 'distances' becomes set in the current namespace. + # Equivalent to: + from MDAnalysis.analysis import distances + + Returns + ------- + module + The module specified by *modname*, or its base, depending on *level*. + The module isn't immediately imported. Instead, a + :class:`MDAnalysis.lib.lazy.LazyModule` instance is returned. Upon + access to any of its attributes, the module is finally loaded. + + .. versionadded:: 0.16.2 + + """ + mod = _import_module(modname, _caller_name()) + if level == 'base': + return sys.modules[modname.split('.')[0]] + elif level == 'leaf': + return mod + else: + raise ValueError("Parameter 'level' must be one of ('base', 'leaf')") + +def _import_module(modname, caller_name): + imp.acquire_lock() + try: + fullmodname = modname + fullsubmodname = None + # ensure parent module/package is in sys.modules + # and parent.modname=module, as soon as the parent is imported + while modname: + try: + mod = sys.modules[modname] + # We reached a (base) module that's already loaded. Let's stop + # the cycle. + modname = '' + except KeyError: + class _LazyModule(LazyModule): + _mda_lazy_caller_name = caller_name + mod = sys.modules[modname] = _LazyModule(modname) + if fullsubmodname: + ModuleType.__setattr__(mod, submodname, + sys.modules[fullsubmodname]) + fullsubmodname = modname + modname, _, submodname = modname.rpartition('.') + return sys.modules[fullmodname] + finally: + imp.release_lock() + +def import_function(modname, *funcnames): + """Function allowing lazy importing of a function into the namespace + + Parameters + ---------- + modname : str + The base module from where to import the function(s) in *funcnames*, + or a full 'module_name.function_name' string. + funcnames : str (optional) + The function name(s) to import from the module specified by *modname*. + If left empty *modname* is assumed to also include the function name + to import. + + Returns + ------- + function or list of functions + If *funcnames* is passed, a list of imported functions -- one for each + element in *funcnames* -- is returned. + If only *modnames* is passed it is assumed to be a full + 'module_name.function_name' string, in which case the imported function + is returned directly, and not in a list. + The module specified by *modname* is always imported lazily, via + :func:`MDAnalysis.lib.lazy.import_module`. + + See Also + -------- + :func:`MDAnalysis.lib.lazy.import_module` + + .. versionadded:: 0.16.2 + + """ + if not funcnames: + # We allow passing a single string as 'modname.funcname', + # in which case the function is returned directly and not as a list. + modname, funcname = modname.rsplit(".", 1) + return _import_function(modname, funcname, _caller_name()) + else: + return [_import_function(modname, fn, _caller_name()) for fn in funcnames] + +def _import_function(modname, funcname, caller_name): + module = _import_module(modname, caller_name) + + def retfun(*args, **kwargs): + try: + return getattr(module, funcname)(*args, **kwargs) + except AttributeError: + raise AttributeError(_MSG_FN.format(caller_name, funcname, modname)) + return retfun + diff --git a/package/MDAnalysis/visualization/streamlines.py b/package/MDAnalysis/visualization/streamlines.py index c8a81b6e299..281c751728b 100644 --- a/package/MDAnalysis/visualization/streamlines.py +++ b/package/MDAnalysis/visualization/streamlines.py @@ -30,7 +30,7 @@ The :func:`generate_streamlines` function can generate a 2D flow field from a MD trajectory, for instance, lipid molecules in a flat membrane. It can make -use of multiple cores to perform the analyis in parallel (using +use of multiple cores to perform the analysis in parallel (using :mod:`multiprocessing`). See Also @@ -47,19 +47,11 @@ import multiprocessing import numpy as np -import scipy - -try: - import matplotlib - import matplotlib.path -except ImportError: - raise ImportError( - '2d streamplot module requires: matplotlib.path for its path.Path.contains_points method. The installation ' - 'instructions for the matplotlib module can be found here: ' - 'http://matplotlib.org/faq/installing_faq.html?highlight=install') - import MDAnalysis +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +matplotlib = lazy.import_module('matplotlib.path', level='base') def produce_grid(tuple_of_limits, grid_spacing): diff --git a/package/MDAnalysis/visualization/streamlines_3D.py b/package/MDAnalysis/visualization/streamlines_3D.py index c735b15dfdd..e713cb93fdd 100644 --- a/package/MDAnalysis/visualization/streamlines_3D.py +++ b/package/MDAnalysis/visualization/streamlines_3D.py @@ -50,11 +50,14 @@ import numpy as np import numpy.testing -import scipy -import scipy.spatial.distance import MDAnalysis +# Optional and/or lazily loaded modules +from MDAnalysis.lib import lazy +scipy = lazy.import_module('scipy.spatial.distance', level='base') + + def determine_container_limits(topology_file_path, trajectory_file_path, buffer_value): """Calculate the extent of the atom coordinates + buffer. diff --git a/testsuite/MDAnalysisTests/analysis/test_distances.py b/testsuite/MDAnalysisTests/analysis/test_distances.py index d2b422f6aa5..7b176076f90 100644 --- a/testsuite/MDAnalysisTests/analysis/test_distances.py +++ b/testsuite/MDAnalysisTests/analysis/test_distances.py @@ -27,7 +27,6 @@ import MDAnalysis from MDAnalysisTests import module_not_found from MDAnalysisTests.datafiles import GRO -from MDAnalysisTests.util import block_import import MDAnalysis.analysis.distances diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index aff0ff6f11d..a6a67883f69 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -30,11 +30,10 @@ import sys import warnings -from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal, - assert_warns) +from numpy.testing import (TestCase, dec, assert_equal, assert_almost_equal) from MDAnalysisTests.datafiles import DCD, DCD2, PSF, TPR, XTC -from MDAnalysisTests import parser_not_found, module_not_found, block_import +from MDAnalysisTests import parser_not_found, module_not_found import MDAnalysis.analysis.rms as rms import MDAnalysis.analysis.align as align @@ -824,22 +823,3 @@ def test_get_distance_matrix(self): # Issue #1324 u = mda.Universe(TPR,XTC) dm = confdistmatrix.get_distance_matrix(u) - -class TestEncoreImportWarnings(object): - def setUp(self): - # clear cache of encore module - for mod in list(sys.modules): # list as we're changing as we iterate - if 'encore' in mod: - sys.modules.pop(mod, None) - - @block_import('sklearn') - def _check_sklearn_import_warns(self, package): - warnings.simplefilter('always') - assert_warns(ImportWarning, importlib.import_module, package) - - def test_import_warnings(self): - for pkg in ( - 'MDAnalysis.analysis.encore.dimensionality_reduction.DimensionalityReductionMethod', - 'MDAnalysis.analysis.encore.clustering.ClusteringMethod', - ): - yield self._check_sklearn_import_warns, pkg From 9c41266da049c0a67eb034cf8deddd2203182fcc Mon Sep 17 00:00:00 2001 From: Manuel Nuno Melo Date: Wed, 21 Jun 2017 11:49:09 +0200 Subject: [PATCH 4/4] Added python 3 support and unit tests This now addresses issues #577 (facultative imports), #1361 (cleaner optional dependencies) and #1159 (optional dependencies in the analysis module) --- package/CHANGELOG | 4 +- package/MDAnalysis/analysis/pca.py | 2 +- package/MDAnalysis/lib/lazy.py | 173 +++++++++++++-------- testsuite/MDAnalysisTests/lib/test_lazy.py | 94 +++++++++++ 4 files changed, 205 insertions(+), 68 deletions(-) create mode 100644 testsuite/MDAnalysisTests/lib/test_lazy.py diff --git a/package/CHANGELOG b/package/CHANGELOG index 0bd718dc3c5..3fe2c48a6fe 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,11 +15,13 @@ The rules for this file: ------------------------------------------------------------------------------ -mm/dd/17 richardjgowers, rathann, jbarnoud +mm/dd/17 richardjgowers, rathann, jbarnoud, manuel.nuno.melo * 0.16.2 Enhancements + * Added support for lazy loading of modules, with delayed, on-access errors + for missing optional dependencies (addresses Issues #577, #1361 and #1159) Fixes * fixed GROWriter truncating long resids from the wrong end (Issue #1395) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index eb748c26ae3..20ba9781e7f 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -115,7 +115,7 @@ # Optional and/or lazily imported modules from MDAnalysis.lib import lazy -scipy = lazy.import_function('scipy.integrate', level='base') +scipy = lazy.import_module('scipy.integrate', level='base') class PCA(AnalysisBase): """Principal component analysis on an MD trajectory. diff --git a/package/MDAnalysis/lib/lazy.py b/package/MDAnalysis/lib/lazy.py index eaaff6f9b5e..5cdf2c811f2 100644 --- a/package/MDAnalysis/lib/lazy.py +++ b/package/MDAnalysis/lib/lazy.py @@ -42,8 +42,7 @@ ==================================================== Functions and classes for lazy module loading that also delay import errors. -Heavily borrowed from the `importing`_ module, which is not very -subclass-friendly. +Heavily borrowed from the `importing`_ module. .. versionadded:: 0.16.2 .. _`importing`: http://peak.telecommunity.com/DevCenter/Importing @@ -60,7 +59,15 @@ from types import ModuleType import sys -import imp +try: + # imp is deprecated since python 3.4 but there's no clear alternative to + # the lock mechanism, other than to import directly from _imp. + from imp import acquire_lock, release_lock +except ImportError: + from _imp import acquire_lock, release_lock + +import six +from six.moves import reload_module _MSG = ("{0} attempted to use a functionality that requires module {1}, but " "it couldn't be loaded. Please install {2} and retry.") @@ -69,78 +76,41 @@ "of module {2}, but it couldn't be found in that module. Please " "install a version of {2} that has {1} and retry.") + class LazyModule(ModuleType): + """Class for lazily-loaded modules that triggers proper loading on access + + Instantiation should be made from a subclass of + :class:`MDAnalysis.lib.lazy.LazyModule`, with one subclass per instantiated + module. Regular attribute set/access can then be recovered by setting the + subclass's :meth:`__getattribute__` and :meth:`__setattribute__` to those + of :class:`types.ModuleType`. + """ # peak.util.imports sets __slots__ to (), but it seems pointless because # the base ModuleType doesn't itself set __slots__. - #__mda_lazy_armed__ = True - def __init__(self, modname): super(ModuleType, self).__setattr__('__name__', modname) def __getattribute__(self, attr): - #if (attr != '__mda_lazy_armed__' and - # self.__mda_lazy_armed__): - print("getting attr {} from module '{}'".format(attr, - super(ModuleType, self).__getattribute__('__name__'))) + # IPython tries to be too clever and constantly inspects, asking for + # modules' attrs, which causes premature module loading and unesthetic + # internal errors if the lazily-loaded module doesn't exist. Returning + # Nones seems to satisfy those needs: + caller_base = _caller_name().partition('.')[0] + if run_from_ipython and caller_base in ('inspect', 'IPython'): + return None _load_module(self) return ModuleType.__getattribute__(self, attr) def __setattr__(self, attr, value): - #if attr != '__mda_lazy_armed__' and self.__mda_lazy_armed__: - print("setting attr {}".format(attr)) _load_module(self) return ModuleType.__setattr__(self, attr, value) -def _load_module(module): - modclass = type(module) - # We only take care of our own LazyModule instances - if not issubclass(modclass, LazyModule): - return - imp.acquire_lock() - try: - modclass.__getattribute__ = ModuleType.__getattribute__ - modclass.__setattr__ = ModuleType.__setattr__ - try: - # Alreay-loaded _LazyModule classes lose their - # _mda_lazy_caller_name attr. No need to redo - # those cases. - caller_name = modclass._mda_lazy_caller_name - except AttributeError: - return - del modclass._mda_lazy_caller_name - # don't reload if already loaded! - #if module.__dict__.keys() == ['__name__']: - #if (set(ModuleType.__getattribute__(module, '__dict__').keys()) == - # set(('__name__', '_mda_lazy_caller_name'))): - print("loading module '{}'".format(module)) - #module.__mda_lazy_armed__ = False - # First, ensure the parent is loaded - # (using recursion; negligible chance we'll ever hit a stack limit - # in this case). - parent, _, modname = module.__name__.rpartition('.') - if parent: - _load_module(sys.modules[parent]) - setattr(sys.modules[parent], modname, module) - # Get Python to do the real import! - try: - reload(module) - except: - #module.__mda_lazy_armed__ = True - del modclass.__getattribute__ - del modclass.__setattr__ - modclass._mda_lazy_caller_name = caller_name - raise - #del module.__mda_lazy_armed__ - print("done loading module '{}'".format(module)) - except ImportError as err: - print("Got an ImportError: '{}'".format(err)) - modname = ModuleType.__getattribute__(module, '__name__') - base_modname = modname.split(".")[0] - raise ImportError(_MSG.format(caller_name, modname, base_modname)) - finally: - imp.release_lock() def _caller_name(depth=2): + """Returns the name of the calling namespace + + """ # the presence of sys._getframe might be implementation-dependent. # It isn't that serious if we can't get the caller's name. try: @@ -148,6 +118,16 @@ def _caller_name(depth=2): except AttributeError: return 'MDAnalysis' + +def run_from_ipython(): + # Taken from https://stackoverflow.com/questions/5376837 + try: + __IPYTHON__ + return True + except NameError: + return False + + def import_module(modname, level='leaf'): """Function allowing lazy importing of a module into the namespace @@ -194,8 +174,9 @@ def import_module(modname, level='leaf'): else: raise ValueError("Parameter 'level' must be one of ('base', 'leaf')") + def _import_module(modname, caller_name): - imp.acquire_lock() + acquire_lock() try: fullmodname = modname fullsubmodname = None @@ -218,10 +199,11 @@ class _LazyModule(LazyModule): modname, _, submodname = modname.rpartition('.') return sys.modules[fullmodname] finally: - imp.release_lock() + release_lock() + def import_function(modname, *funcnames): - """Function allowing lazy importing of a function into the namespace + """Performs lazy importing of one or more functions into the namespace Parameters ---------- @@ -236,9 +218,9 @@ def import_function(modname, *funcnames): Returns ------- function or list of functions - If *funcnames* is passed, a list of imported functions -- one for each - element in *funcnames* -- is returned. - If only *modnames* is passed it is assumed to be a full + If *funcnames* is passed, returns a list of imported functions, one for + each element in *funcnames*. + If only *modname* is passed it is assumed to be a full 'module_name.function_name' string, in which case the imported function is returned directly, and not in a list. The module specified by *modname* is always imported lazily, via @@ -254,11 +236,12 @@ def import_function(modname, *funcnames): if not funcnames: # We allow passing a single string as 'modname.funcname', # in which case the function is returned directly and not as a list. - modname, funcname = modname.rsplit(".", 1) + modname, _, funcname = modname.rpartition(".") return _import_function(modname, funcname, _caller_name()) else: return [_import_function(modname, fn, _caller_name()) for fn in funcnames] + def _import_function(modname, funcname, caller_name): module = _import_module(modname, caller_name) @@ -269,3 +252,61 @@ def retfun(*args, **kwargs): raise AttributeError(_MSG_FN.format(caller_name, funcname, modname)) return retfun + +def _load_module(module): + """Ensures that a module, and its parents, are properly loaded + + """ + modclass = type(module) + # We only take care of our own LazyModule instances + if not issubclass(modclass, LazyModule): + return + acquire_lock() + try: + try: + # Alreay-loaded _LazyModule classes lose their + # _mda_lazy_caller_name attr. No need to redo + # those cases. + caller_name = modclass._mda_lazy_caller_name + except AttributeError: + return + modclass.__getattribute__ = ModuleType.__getattribute__ + modclass.__setattr__ = ModuleType.__setattr__ + del modclass._mda_lazy_caller_name + + # First, ensure the parent is loaded + # (using recursion; negligible chance we'll ever hit a stack limit + # in this case). + parent, _, modname = module.__name__.rpartition('.') + try: + if parent: + _load_module(sys.modules[parent]) + setattr(sys.modules[parent], modname, module) + # Get Python to do the real import! + reload_module(module) + except: + # We reset our state + del modclass.__getattribute__ + del modclass.__setattr__ + modclass._mda_lazy_caller_name = caller_name + raise + except (AttributeError, ImportError) as err: + # Under Python 3 reloading our dummy LazyModule instances causes an + # AttributeError if the module can't be found. Would be preferrable if + # we could always rely on an ImportError. As it is we vet the + # AttributeError as thoroughly as possible. + if (six.PY3 and isinstance(err, AttributeError) and + err.args[0] != "'NoneType' object has no attribute 'name'"): + # Not the AttributeError we were looking for. + raise + modname = ModuleType.__getattribute__(module, '__name__') + base_modname = modname.split(".")[0] + # Way to silence context tracebacks in Python 3 but with a syntax + # compatible with Python 2. This would normally be: + # raise ImportError(...) from None + exc = ImportError(_MSG.format(caller_name, modname, base_modname)) + exc.__suppress_context__ = True + raise exc + finally: + release_lock() + diff --git a/testsuite/MDAnalysisTests/lib/test_lazy.py b/testsuite/MDAnalysisTests/lib/test_lazy.py new file mode 100644 index 00000000000..8b382c46cf8 --- /dev/null +++ b/testsuite/MDAnalysisTests/lib/test_lazy.py @@ -0,0 +1,94 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- http://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import sys +from numpy.testing import assert_, assert_raises + +import MDAnalysis as mda +from MDAnalysis.lib import lazy +from MDAnalysisTests import block_import + + +def _check_all_present(modnames): + for modname in modnames: + assert_(modname in sys.modules) + +class TestLazyExisting(object): + modnames = ('MDAnalysis', 'MDAnalysis.analysis', + 'MDAnalysis.analysis.distances') + + # We attempt to run module functions (without arguments, which triggers + # TypeError exceptions) to see whether we can reach them. + def test_load_base(self): + MDAnalysis = lazy.import_module("MDAnalysis.analysis.distances", + level='base') + _check_all_present(self.modnames) + assert_raises(TypeError, MDAnalysis.analysis.distances.dist) + + def test_load_leaf(self): + distances = lazy.import_module("MDAnalysis.analysis.distances") + _check_all_present(self.modnames) + assert_raises(TypeError, distances.dist) + + def test_load_function(self): + dist = lazy.import_function("MDAnalysis.analysis.distances.dist") + _check_all_present(self.modnames) + assert_raises(TypeError, dist) + + def test_load_functions(self): + dist, dist_nonexistent = lazy.import_function("MDAnalysis.analysis.distances", + "dist", "dist_nonexistent") + _check_all_present(self.modnames) + assert_raises(TypeError, dist) + assert_raises(AttributeError, dist_nonexistent) + + +class TestLazyMissing(object): + modnames = ('scipy', 'scipy.stats') + + # In this case failure occurs on accession, so we must test for that, + # rather than function behavior. + @block_import('scipy') + def test_load_base(self): + scipy = lazy.import_module("scipy.stats", level='base') + _check_all_present(self.modnames) + assert_raises(ImportError, getattr, scipy, 'stats') + + @block_import('scipy') + def test_load_leaf(self): + stats = lazy.import_module("scipy.stats") + _check_all_present(self.modnames) + assert_raises(ImportError, getattr, stats, 'anderson') + + @block_import('scipy') + def test_load_function(self): + func1 = lazy.import_function("scipy.stats.anderson") + _check_all_present(self.modnames) + assert_raises(ImportError, func1) + + @block_import('scipy') + def test_load_functions(self): + func1, func2 = lazy.import_function("scipy.stats", + "anderson", "whatever_") + _check_all_present(self.modnames) + assert_raises(ImportError, func1) + assert_raises(ImportError, func2) +