diff --git a/package/CHANGELOG b/package/CHANGELOG index c816bbd859a..06d8a129a09 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -187,6 +187,9 @@ Enhancements to transfer data from HDF5 file into timestep. (PR #3293) Changes + * `analysis.pca.PCA` class now stores `p_components`, `variance`, + `cumulated_variance` and `mean_atoms` using the + `analysis.base.Results` class (Issues #3275 #3285) * Timestep now stores information in 'C' memory layout instead of the previous 'F' default (PR #1738) * `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF_s` now store @@ -281,6 +284,11 @@ Changes * Added OpenMM coordinate and topology converters (Issue #2863, PR #2917) Deprecations + * The attributes `p_components`, `variance`, `cumulated_variance` and + `mean_atoms` in `analysis.pca.PCA` are now deprecated in favour of + `results.p_components`, `results.variance`, `results.cumulated_variance` + and `results.mean_atoms`. They will be removed in 3.0.0 + (Issues #3275 #3285) * The `bins`, `edges`, `count`, `rdf` attributes for `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF_s`, and `cdf` attributes for `analysis.rdf.InterRDF_s` are now deprecated in favour of `results.bins`, diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index c0af950bccf..c12d79e347c 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -43,12 +43,13 @@ components are the eigenvectors of this matrix. For each eigenvector, its eigenvalue is the variance that the eigenvector -explains. Stored in :attr:`PCA.cumulated_variance`, a ratio for each number of -eigenvectors up to index :math:`i` is provided to quickly find out how many -principal components are needed to explain the amount of variance reflected by -those :math:`i` eigenvectors. For most data, :attr:`PCA.cumulated_variance` +explains. Stored in :attr:`PCA.results.cumulated_variance`, a ratio for each +number of eigenvectors up to index :math:`i` is provided to quickly find out +how many principal components are needed to explain the amount of variance +reflected by those :math:`i` eigenvectors. For most data, +:attr:`PCA.results.cumulated_variance` will be approximately equal to one for some :math:`n` that is significantly -smaller than the total number of components, these are the components of +smaller than the total number of components. These are the components of interest given by Principal Component Analysis. From here, we can project a trajectory onto these principal components and @@ -67,30 +68,34 @@ :data:`~MDAnalysis.tests.datafiles.DCD`). This tutorial shows how to use the PCA class. -First load all modules and test data +First load all modules and test data:: + + import MDAnalysis as mda + import MDAnalysis.analysis.pca as pca + from MDAnalysis.tests.datafiles import PSF, DCD - >>> import MDAnalysis as mda - >>> import MDAnalysis.analysis.pca as pca - >>> from MDAnalysis.tests.datafiles import PSF, DCD Given a universe containing trajectory data we can perform Principal Component Analyis by using the class :class:`PCA` and retrieving the principal -components. +components.:: + + u = mda.Universe(PSF, DCD) + PSF_pca = pca.PCA(u, select='backbone') + PSF_pca.run() - >>> u = mda.Universe(PSF, DCD) - >>> PSF_pca = pca.PCA(u, select='backbone') - >>> PSF_pca.run() Inspect the components to determine the principal components you would like to retain. The choice is arbitrary, but I will stop when 95 percent of the variance is explained by the components. This cumulated variance by the components is conveniently stored in the one-dimensional array attribute -``cumulated_variance``. The value at the ith index of `cumulated_variance` -is the sum of the variances from 0 to i. +:attr:`PCA.results.cumulated_variance`. The value at the ith index of +:attr:`PCA.results.cumulated_variance` is the sum of the variances from 0 to +i.:: + + n_pcs = np.where(PSF_pca.results.cumulated_variance > 0.95)[0][0] + atomgroup = u.select_atoms('backbone') + pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) - >>> n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] - >>> atomgroup = u.select_atoms('backbone') - >>> pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) From here, inspection of the ``pca_space`` and conclusions to be drawn from the data are left to the user. @@ -104,6 +109,10 @@ .. autofunction:: cosine_content +.. autofunction:: rmsip + +.. autofunction:: cumulative_overlap + """ import warnings @@ -124,32 +133,82 @@ class PCA(AnalysisBase): After initializing and calling method with a universe or an atom group, principal components ordering the atom coordinate data by decreasing - variance will be available for analysis. As an example: + variance will be available for analysis. As an example::: + + pca = PCA(universe, select='backbone').run() + pca_space = pca.transform(universe.select_atoms('backbone'), 3) - >>> pca = PCA(universe, select='backbone').run() - >>> pca_space = pca.transform(universe.select_atoms('backbone'), 3) generates the principal components of the backbone of the atomgroup and then transforms those atomgroup coordinates by the direction of those variances. Please refer to the :ref:`PCA-tutorial` for more detailed instructions. + Parameters + ---------- + universe : Universe + Universe + select : string, optional + A valid selection statement for choosing a subset of atoms from + the atomgroup. + align : boolean, optional + If True, the trajectory will be aligned to a reference + structure. + mean : array_like, optional + Optional reference positions to be be used as the mean of the + covariance matrix. + n_components : int, optional + The number of principal components to be saved, default saves + all principal components + verbose : bool (optional) + Show detailed progress of the calculation if set to ``True``. + Attributes ---------- - p_components: array, (n_atoms * 3, n_components) - The principal components of the feature space, + results.p_components: array, (n_atoms * 3, n_components) + Principal components of the feature space, representing the directions of maximum variance in the data. The column vector p_components[:, i] is the eigenvector corresponding to the variance[i]. - variance : array (n_components, ) - The raw variance explained by each eigenvector of the covariance + + .. versionadded:: 2.0.0 + + p_components: array, (n_atoms * 3, n_components) + Alias to the :attr:`results.p_components`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.p_components` instead. + + results.variance : array (n_components, ) + Raw variance explained by each eigenvector of the covariance matrix. - cumulated_variance : array, (n_components, ) + + .. versionadded:: 2.0.0 + + variance : array (n_components, ) + Alias to the :attr:`results.variance`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.variance` instead. + + results.cumulated_variance : array, (n_components, ) Percentage of variance explained by the selected components and the sum of the components preceding it. If a subset of components is not chosen then all components are stored and the cumulated variance will converge to 1. + .. versionadded:: 2.0.0 + + cumulated_variance : array, (n_components, ) + Alias to the :attr:`results.cumulated_variance`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.cumulated_variance` instead. + + Methods ------- transform(atomgroup, n_components=None) @@ -161,11 +220,10 @@ class PCA(AnalysisBase): ----- Computation can be sped up by supplying precalculated mean positions. - .. versionchanged:: 2.0.0 - ``mean_atoms`` removed, as this did not reliably contain the mean - positions. - ``mean`` input now accepts coordinate arrays instead of atomgroup. + .. versionchanged:: 0.19.0 + The start frame is used when performing selections and calculating + mean positions. Previously the 0th frame was always used. .. versionchanged:: 1.0.0 ``n_components`` now limits the correct axis of ``p_components``. ``cumulated_variance`` now accurately represents the contribution of @@ -174,34 +232,16 @@ class PCA(AnalysisBase): ``p_components``, ``cumulated_variance`` will not sum to 1. ``align=True`` now correctly aligns the trajectory and computes the correct means and covariance matrix. - - .. versionchanged:: 0.19.0 - The start frame is used when performing selections and calculating - mean positions. Previously the 0th frame was always used. + .. versionchanged:: 2.0.0 + ``mean_atoms`` removed, as this did not reliably contain the mean + positions. + ``mean`` input now accepts coordinate arrays instead of atomgroup. + :attr:`p_components`, :attr:`variance` and :attr:`cumulated_variance` + are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. """ def __init__(self, universe, select='all', align=False, mean=None, n_components=None, **kwargs): - """ - Parameters - ---------- - universe : Universe - Universe - select : string, optional - A valid selection statement for choosing a subset of atoms from - the atomgroup. - align : boolean, optional - If True, the trajectory will be aligned to a reference - structure. - mean : array_like, optional - Optional reference positions to be be used as the mean of the - covariance matrix. - n_components : int, optional - The number of principal components to be saved, default saves - all principal components - verbose : bool (optional) - Show detailed progress of the calculation if set to ``True``. - """ super(PCA, self).__init__(universe.trajectory, **kwargs) self._u = universe @@ -281,6 +321,30 @@ def _conclude(self): self._calculated = True self.n_components = self._n_components + @property + def p_components(self): + wmsg = ("The `p_components` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.p_components` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.p_components + + @property + def variance(self): + wmsg = ("The `variance` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.variance` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.variance + + @property + def cumulated_variance(self): + wmsg = ("The `cumulated_variance` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.cumulated_variance` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.cumulated_variance + @property def n_components(self): return self._n_components @@ -290,10 +354,10 @@ def n_components(self, n): if self._calculated: if n is None: n = len(self._variance) - self.variance = self._variance[:n] - self.cumulated_variance = (np.cumsum(self._variance) / + self.results.variance = self._variance[:n] + self.results.cumulated_variance = (np.cumsum(self._variance) / np.sum(self._variance))[:n] - self.p_components = self._p_components[:, :n] + self.results.p_components = self._p_components[:, :n] self._n_components = n def transform(self, atomgroup, n_components=None, start=None, stop=None, @@ -348,7 +412,7 @@ def transform(self, atomgroup, n_components=None, start=None, stop=None, n_frames = len(range(start, stop, step)) dim = (n_components if n_components is not None else - self.p_components.shape[1]) + self.results.p_components.shape[1]) dot = np.zeros((n_frames, dim)) @@ -390,12 +454,13 @@ def rmsip(self, other, n_components=None): See also -------- - rmsip + :func:`~MDAnalysis.analysis.pca.rmsip` + .. versionadded:: 1.0.0 """ try: - a = self.p_components + a = self.results.p_components except AttributeError: raise ValueError('Call run() on the PCA before using rmsip') @@ -442,14 +507,14 @@ def cumulative_overlap(self, other, i=0, n_components=None): See also -------- - cumulative_overlap + :func:`~MDAnalysis.analysis.pca.cumulative_overlap` - .. versionadded:: 1.0.0 + .. versionadded:: 1.0.0 """ try: - a = self.p_components + a = self.results.p_components except AttributeError: raise ValueError( 'Call run() on the PCA before using cumulative_overlap') @@ -533,6 +598,7 @@ def rmsip(a, b, n_components=None): 0 indicates that they are mutually orthogonal, whereas 1 indicates that they are identical. + .. versionadded:: 1.0.0 """ n_components = util.asiterable(n_components) @@ -586,6 +652,7 @@ def cumulative_overlap(a, b, i=0, n_components=None): 0 indicates that they are mutually orthogonal, whereas 1 indicates that they are identical. + .. versionadded:: 1.0.0 """ diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 2cfa1c00e61..931a04673f8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -71,23 +71,25 @@ def test_cov(pca, u): def test_cum_var(pca): - assert_almost_equal(pca.cumulated_variance[-1], 1) - l = pca.cumulated_variance - l = np.sort(l) - assert_almost_equal(pca.cumulated_variance, l, 5) + assert_almost_equal(pca.results.cumulated_variance[-1], 1) + cum_var = pca.results.cumulated_variance + cum_var = np.sort(cum_var) + assert_almost_equal(pca.results.cumulated_variance, cum_var, 5) def test_pcs(pca): - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + pca._n_atoms * 3)) def test_pcs_n_components(u): pca = PCA(u, select=SELECTION).run() assert_equal(pca.n_components, pca._n_atoms*3) - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + pca._n_atoms * 3)) pca.n_components = 10 assert_equal(pca.n_components, 10) - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, 10)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, 10)) def test_different_steps(pca, u): @@ -191,8 +193,8 @@ def test_pca_rmsip_self(pca): def test_rmsip_ortho(pca): - value = rmsip(pca.p_components[:, :10].T, - pca.p_components[:, 10:20].T) + value = rmsip(pca.results.p_components[:, :10].T, + pca.results.p_components[:, 10:20].T) assert_almost_equal(value, 0.0) @@ -216,7 +218,7 @@ def test_pca_cumulative_overlap_self(pca): def test_cumulative_overlap_ortho(pca): - pcs = pca.p_components + pcs = pca.results.p_components value = cumulative_overlap(pcs[:, 11].T, pcs.T, n_components=10) assert_almost_equal(value, 0.0) @@ -251,3 +253,12 @@ def test_compare_wrong_class(u, pca, method): with pytest.raises(ValueError) as exc: func(3) assert 'must be another PCA class' in str(exc.value) + + +@pytest.mark.parametrize("attr", ("p_components", "variance", + "cumulated_variance")) +def test_pca_attr_warning(u, attr): + pca = PCA(u, select=SELECTION).run(stop=2) + wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" + with pytest.warns(DeprecationWarning, match=wmsg): + getattr(pca, attr) is pca.results[attr]