From 32323933b10ccfdf38d614d03f2438f432d3ba6e Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 12:14:32 +0200 Subject: [PATCH 01/17] Use results for pca --- package/MDAnalysis/analysis/pca.py | 160 +++++++++++++----- .../MDAnalysisTests/analysis/test_pca.py | 20 +-- 2 files changed, 124 insertions(+), 56 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index c4e8b7b9afe..88e69cdfd72 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -69,17 +69,21 @@ First load all modules and test data - >>> import MDAnalysis as mda - >>> import MDAnalysis.analysis.pca as pca - >>> from MDAnalysis.tests.datafiles import PSF, DCD +.. code-block:: python + import MDAnalysis as mda + import MDAnalysis.analysis.pca as pca + rom MDAnalysis.tests.datafiles import PSF, DCD + Given a universe containing trajectory data we can perform Principal Component Analyis by using the class :class:`PCA` and retrieving the principal components. - >>> u = mda.Universe(PSF, DCD) - >>> PSF_pca = pca.PCA(u, select='backbone') - >>> PSF_pca.run() +.. code-block:: python + u = mda.Universe(PSF, DCD) + PSF_pca = pca.PCA(u, select='backbone') + PSF_pca.run() + Inspect the components to determine the principal components you would like to retain. The choice is arbitrary, but I will stop when 95 percent of the @@ -88,9 +92,11 @@ ``cumulated_variance``. The value at the ith index of `cumulated_variance` is the sum of the variances from 0 to i. - >>> n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] - >>> atomgroup = u.select_atoms('backbone') - >>> pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) +.. code-block:: python + n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] + atomgroup = u.select_atoms('backbone') + pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) + From here, inspection of the ``pca_space`` and conclusions to be drawn from the data are left to the user. @@ -123,32 +129,83 @@ class PCA(AnalysisBase): principal components ordering the atom coordinate data by decreasing variance will be available for analysis. As an example: - >>> pca = PCA(universe, select='backbone').run() - >>> pca_space = pca.transform(universe.select_atoms('backbone'), 3) + .. code-block:: python + pca = PCA(universe, select='backbone').run() + pca_space = pca.transform(universe.select_atoms('backbone'), 3) + generates the principal components of the backbone of the atomgroup and then transforms those atomgroup coordinates by the direction of those variances. Please refer to the :ref:`PCA-tutorial` for more detailed instructions. + Parameters + ---------- + universe : Universe + Universe + select : string, optional + A valid selection statement for choosing a subset of atoms from + the atomgroup. + align : boolean, optional + If True, the trajectory will be aligned to a reference + structure. + mean : MDAnalysis atomgroup, optional + An optional reference structure to be used as the mean of the + covariance matrix. + n_components : int, optional + The number of principal components to be saved, default saves + all principal components + verbose : bool (optional) + Show detailed progress of the calculation if set to ``True``. + Attributes ---------- - p_components: array, (n_atoms * 3, n_components) - The principal components of the feature space, + results.p_components: array, (n_atoms * 3, n_components) + Principal components of the feature space, representing the directions of maximum variance in the data. The column vector p_components[:, i] is the eigenvector corresponding to the variance[i]. - variance : array (n_components, ) - The raw variance explained by each eigenvector of the covariance + + p_components: array, (n_atoms * 3, n_components) + Alias to the :attr:`results.p_components`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.density` instead. + + results.variance : array (n_components, ) + Raw variance explained by each eigenvector of the covariance matrix. - cumulated_variance : array, (n_components, ) + + variance : array (n_components, ) + Alias to the :attr:`results.variance`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.density` instead. + + results.cumulated_variance : array, (n_components, ) Percentage of variance explained by the selected components and the sum of the components preceding it. If a subset of components is not chosen then all components are stored and the cumulated variance will converge to 1. + + cumulated_variance : array, (n_components, ) + Alias to the :attr:`results.cumulated_variance`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.density` instead. + + results.mean_atoms: MDAnalyis atomgroup + Atoms used for the creation of the covariance matrix. + mean_atoms: MDAnalyis atomgroup - After running :meth:`PCA.run`, the mean position of all the atoms - used for the creation of the covariance matrix will exist here. + Alias to the :attr:`results.mean_atoms`. + + .. deprecated:: 2.0.0 + Will be removed in MDAnalysis 3.0.0. Please use + :attr:`results.density` instead. Methods ------- @@ -177,26 +234,6 @@ class PCA(AnalysisBase): def __init__(self, universe, select='all', align=False, mean=None, n_components=None, **kwargs): - """ - Parameters - ---------- - universe : Universe - Universe - select : string, optional - A valid selection statement for choosing a subset of atoms from - the atomgroup. - align : boolean, optional - If True, the trajectory will be aligned to a reference - structure. - mean : MDAnalysis atomgroup, optional - An optional reference structure to be used as the mean of the - covariance matrix. - n_components : int, optional - The number of principal components to be saved, default saves - all principal components - verbose : bool (optional) - Show detailed progress of the calculation if set to ``True``. - """ super(PCA, self).__init__(universe.trajectory, **kwargs) self._u = universe @@ -246,8 +283,7 @@ def _prepare(self): self.mean += self._atoms.positions.ravel() self.mean /= self.n_frames - self.mean_atoms = self._atoms - self.mean_atoms.positions = self._atoms.positions + self.results.mean_atoms = self._atoms def _single_frame(self): if self.align: @@ -273,6 +309,38 @@ def _conclude(self): self._calculated = True self.n_components = self._n_components + @property + def p_components(self): + wmsg = ("The `p_components` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.p_components` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.p_components + + @property + def variance(self): + wmsg = ("The `variance` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.variance` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.variance + + @property + def cumulated_variance(self): + wmsg = ("The `cumulated_variance` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.cumulated_variance` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.cumulated_variance + + @property + def mean_atoms(self): + wmsg = ("The `mean_atoms` attribute was deprecated in " + "MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. " + "Please use `results.mean_atoms` instead.") + warnings.warn(wmsg, DeprecationWarning) + return self.results.mean_atoms + @property def n_components(self): return self._n_components @@ -282,10 +350,10 @@ def n_components(self, n): if self._calculated: if n is None: n = len(self._variance) - self.variance = self._variance[:n] - self.cumulated_variance = (np.cumsum(self._variance) / + self.results.variance = self._variance[:n] + self.results.cumulated_variance = (np.cumsum(self._variance) / np.sum(self._variance))[:n] - self.p_components = self._p_components[:, :n] + self.results.p_components = self._p_components[:, :n] self._n_components = n def transform(self, atomgroup, n_components=None, start=None, stop=None, @@ -339,7 +407,7 @@ def transform(self, atomgroup, n_components=None, start=None, stop=None, n_frames = len(range(start, stop, step)) dim = (n_components if n_components is not None else - self.p_components.shape[1]) + self.results.p_components.shape[1]) dot = np.zeros((n_frames, dim)) @@ -386,7 +454,7 @@ def rmsip(self, other, n_components=None): .. versionadded:: 1.0.0 """ try: - a = self.p_components + a = self.results.p_components except AttributeError: raise ValueError('Call run() on the PCA before using rmsip') @@ -440,7 +508,7 @@ def cumulative_overlap(self, other, i=0, n_components=None): """ try: - a = self.p_components + a = self.results.p_components except AttributeError: raise ValueError( 'Call run() on the PCA before using cumulative_overlap') diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 18c47d649f9..0c7cdaa4ef1 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -69,23 +69,23 @@ def test_cov(pca, u): def test_cum_var(pca): - assert_almost_equal(pca.cumulated_variance[-1], 1) - l = pca.cumulated_variance + assert_almost_equal(pca.results.cumulated_variance[-1], 1) + l = pca.results.cumulated_variance l = np.sort(l) - assert_almost_equal(pca.cumulated_variance, l, 5) + assert_almost_equal(pca.results.cumulated_variance, l, 5) def test_pcs(pca): - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) def test_pcs_n_components(u): pca = PCA(u, select=SELECTION).run() assert_equal(pca.n_components, pca._n_atoms*3) - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) pca.n_components = 10 assert_equal(pca.n_components, 10) - assert_equal(pca.p_components.shape, (pca._n_atoms * 3, 10)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, 10)) def test_different_steps(pca, u): @@ -182,8 +182,8 @@ def test_pca_rmsip_self(pca): def test_rmsip_ortho(pca): - value = rmsip(pca.p_components[:, :10].T, - pca.p_components[:, 10:20].T) + value = rmsip(pca.results.p_components[:, :10].T, + pca.results.p_components[:, 10:20].T) assert_almost_equal(value, 0.0) @@ -207,7 +207,7 @@ def test_pca_cumulative_overlap_self(pca): def test_cumulative_overlap_ortho(pca): - pcs = pca.p_components + pcs = pca.results.p_components value = cumulative_overlap(pcs[:, 11].T, pcs.T, n_components=10) assert_almost_equal(value, 0.0) @@ -241,4 +241,4 @@ def test_compare_wrong_class(u, pca, method): func = getattr(pca, method) with pytest.raises(ValueError) as exc: func(3) - assert 'must be another PCA class' in str(exc.value) + assert 'must be another PCA class' in str(exc.value) \ No newline at end of file From 2afaa09acbb22590234c1618fc25e55e25ba3f18 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 12:37:44 +0200 Subject: [PATCH 02/17] Added tests --- .../MDAnalysisTests/analysis/test_pca.py | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 0c7cdaa4ef1..4a1f3cfb3c5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -70,19 +70,21 @@ def test_cov(pca, u): def test_cum_var(pca): assert_almost_equal(pca.results.cumulated_variance[-1], 1) - l = pca.results.cumulated_variance - l = np.sort(l) - assert_almost_equal(pca.results.cumulated_variance, l, 5) + cum_var = pca.results.cumulated_variance + cum_var = np.sort(l) + assert_almost_equal(pca.results.cumulated_variance, cum_var, 5) def test_pcs(pca): - assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + pca._n_atoms * 3)) def test_pcs_n_components(u): pca = PCA(u, select=SELECTION).run() assert_equal(pca.n_components, pca._n_atoms*3) - assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + pca._n_atoms * 3)) pca.n_components = 10 assert_equal(pca.n_components, 10) assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, 10)) @@ -241,4 +243,12 @@ def test_compare_wrong_class(u, pca, method): func = getattr(pca, method) with pytest.raises(ValueError) as exc: func(3) - assert 'must be another PCA class' in str(exc.value) \ No newline at end of file + assert 'must be another PCA class' in str(exc.value) + +@pytest.mark.parametrize("attr", ("p_components", "variance", + "cumulated_variance", "mean_atoms")) +def test_pca_attr_warning(u, attr): + pca = PCA(u, select=SELECTION).run(stop=2) + wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" + with pytest.warns(DeprecationWarning, match=wmsg): + assert_equal(getattr(pca, attr), pca.results[attr]) From ce4a17ec0af1679f97333b0ece4ed17da59162f1 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 12:48:34 +0200 Subject: [PATCH 03/17] Updated CHANGELOG --- package/CHANGELOG | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 35569ec940e..eeec43e3e6b 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -174,6 +174,9 @@ Enhancements checking if it can be used in parallel analysis. (Issue #2996, PR #2950) Changes + * `analysis.pca.PCA` class now stores `p_components`, `variance`, + `cumulated_variance` and `mean_atoms` using the + `analysis.base.Results` class (Issues #3275 #3285) * `analysis.rms.RMSD` and `analysis.rms.RMSF` classes now store `rmsd` and `rmsf` data using the `analysis.base.Results` class (Issues #3274 #3261) * `analysis.dihedrals` classes now store angle data using the @@ -235,6 +238,11 @@ Changes * Added OpenMM coordinate and topology converters (Issue #2863, PR #2917) Deprecations + * The attributes `p_components`, `variance`, `cumulated_variance` and + `mean_atoms` in `analysis.pca.PCA` are now deprecated in favour of + `results.p_components`, `results.variance`, `results.cumulated_variance` + and `results.mean_atoms`. They will be removed in 3.0.0 + (Issues #3275 #3285) * The `analysis.rms.RMSD.rmsd` and `analysis.rms.RMSF.rmsf` attributes are now deprecated in favour of `analysis.rms.RMSD.results.rmsd` and `analysis.rms.RMSF.results.rmsf` respectively. They will be removed in From c2f16d266a3df5263950f872fb80a0be1cd43d85 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 14:09:51 +0200 Subject: [PATCH 04/17] Docs updated --- package/MDAnalysis/analysis/pca.py | 31 ++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 88e69cdfd72..de962855975 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -70,6 +70,7 @@ First load all modules and test data .. code-block:: python + import MDAnalysis as mda import MDAnalysis.analysis.pca as pca rom MDAnalysis.tests.datafiles import PSF, DCD @@ -80,6 +81,7 @@ components. .. code-block:: python + u = mda.Universe(PSF, DCD) PSF_pca = pca.PCA(u, select='backbone') PSF_pca.run() @@ -93,6 +95,7 @@ is the sum of the variances from 0 to i. .. code-block:: python + n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] atomgroup = u.select_atoms('backbone') pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) @@ -130,6 +133,7 @@ class PCA(AnalysisBase): variance will be available for analysis. As an example: .. code-block:: python + pca = PCA(universe, select='backbone').run() pca_space = pca.transform(universe.select_atoms('backbone'), 3) @@ -166,23 +170,27 @@ class PCA(AnalysisBase): The column vector p_components[:, i] is the eigenvector corresponding to the variance[i]. + .. versionadded:: 2.0.0 + p_components: array, (n_atoms * 3, n_components) Alias to the :attr:`results.p_components`. .. deprecated:: 2.0.0 Will be removed in MDAnalysis 3.0.0. Please use - :attr:`results.density` instead. + :attr:`results.p_components` instead. results.variance : array (n_components, ) Raw variance explained by each eigenvector of the covariance matrix. + .. versionadded:: 2.0.0 + variance : array (n_components, ) Alias to the :attr:`results.variance`. .. deprecated:: 2.0.0 Will be removed in MDAnalysis 3.0.0. Please use - :attr:`results.density` instead. + :attr:`results.variance` instead. results.cumulated_variance : array, (n_components, ) Percentage of variance explained by the selected components and the sum @@ -190,22 +198,26 @@ class PCA(AnalysisBase): then all components are stored and the cumulated variance will converge to 1. + .. versionadded:: 2.0.0 + cumulated_variance : array, (n_components, ) Alias to the :attr:`results.cumulated_variance`. .. deprecated:: 2.0.0 Will be removed in MDAnalysis 3.0.0. Please use - :attr:`results.density` instead. + :attr:`results.cumulated_variance` instead. results.mean_atoms: MDAnalyis atomgroup Atoms used for the creation of the covariance matrix. + .. versionadded:: 2.0.0 + mean_atoms: MDAnalyis atomgroup Alias to the :attr:`results.mean_atoms`. .. deprecated:: 2.0.0 Will be removed in MDAnalysis 3.0.0. Please use - :attr:`results.density` instead. + :attr:`results.mean_atoms` instead. Methods ------- @@ -218,6 +230,9 @@ class PCA(AnalysisBase): ----- Computation can be sped up by supplying a precalculated mean structure. + .. versionchanged:: 0.19.0 + The start frame is used when performing selections and calculating + mean positions. Previously the 0th frame was always used. .. versionchanged:: 1.0.0 ``n_components`` now limits the correct axis of ``p_components``. ``cumulated_variance`` now accurately represents the contribution of @@ -226,10 +241,10 @@ class PCA(AnalysisBase): ``p_components``, ``cumulated_variance`` will not sum to 1. ``align=True`` now correctly aligns the trajectory and computes the correct means and covariance matrix. - - .. versionchanged:: 0.19.0 - The start frame is used when performing selections and calculating - mean positions. Previously the 0th frame was always used. + .. versionchanged:: 2.0.0 + :attr:`p_components`, :attr:`variance`, :attr:`cumulated_variance` + and :attr:`mean_atoms` are now stored in a + :class:`MDAnalysis.analysis.base.Results` instance. """ def __init__(self, universe, select='all', align=False, mean=None, From 813af65e6943bbda0790e230b7cf1126093a8b2f Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 14:44:05 +0200 Subject: [PATCH 05/17] PEP8 --- testsuite/MDAnalysisTests/analysis/test_pca.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 4a1f3cfb3c5..93b9bd25a98 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -76,14 +76,14 @@ def test_cum_var(pca): def test_pcs(pca): - assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) def test_pcs_n_components(u): pca = PCA(u, select=SELECTION).run() assert_equal(pca.n_components, pca._n_atoms*3) - assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, + assert_equal(pca.results.p_components.shape, (pca._n_atoms * 3, pca._n_atoms * 3)) pca.n_components = 10 assert_equal(pca.n_components, 10) @@ -245,7 +245,8 @@ def test_compare_wrong_class(u, pca, method): func(3) assert 'must be another PCA class' in str(exc.value) -@pytest.mark.parametrize("attr", ("p_components", "variance", + +@pytest.mark.parametrize("attr", ("p_components", "variance", "cumulated_variance", "mean_atoms")) def test_pca_attr_warning(u, attr): pca = PCA(u, select=SELECTION).run(stop=2) From c915fa6ff874931911e3c049286a188bb41f37e7 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Fri, 7 May 2021 14:46:01 +0200 Subject: [PATCH 06/17] Fixed forgotten rename --- testsuite/MDAnalysisTests/analysis/test_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 93b9bd25a98..7dadce48af2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -71,7 +71,7 @@ def test_cov(pca, u): def test_cum_var(pca): assert_almost_equal(pca.results.cumulated_variance[-1], 1) cum_var = pca.results.cumulated_variance - cum_var = np.sort(l) + cum_var = np.sort(cum_var) assert_almost_equal(pca.results.cumulated_variance, cum_var, 5) From 2f4c85809d874b0b02d001d589706c98eab42ba8 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Sat, 8 May 2021 00:23:52 +0200 Subject: [PATCH 07/17] check object instance --- testsuite/MDAnalysisTests/analysis/test_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 7dadce48af2..00b46d07718 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -252,4 +252,4 @@ def test_pca_attr_warning(u, attr): pca = PCA(u, select=SELECTION).run(stop=2) wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): - assert_equal(getattr(pca, attr), pca.results[attr]) + getattr(pca, attr) is pca.results[attr] From 45c238efa56df8f0ea91e614b16073610253dd9a Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Sat, 8 May 2021 01:05:12 +0200 Subject: [PATCH 08/17] Update package/MDAnalysis/analysis/pca.py Co-authored-by: Irfan Alibay --- package/MDAnalysis/analysis/pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index de962855975..190a641031e 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -91,7 +91,7 @@ to retain. The choice is arbitrary, but I will stop when 95 percent of the variance is explained by the components. This cumulated variance by the components is conveniently stored in the one-dimensional array attribute -``cumulated_variance``. The value at the ith index of `cumulated_variance` +``results.cumulated_variance``. The value at the ith index of `results.cumulated_variance` is the sum of the variances from 0 to i. .. code-block:: python From 91941165dd9f175f34ba502a245d56c38404cedb Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Sat, 8 May 2021 01:07:52 +0200 Subject: [PATCH 09/17] PEP8 --- package/MDAnalysis/analysis/pca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 190a641031e..31705cb2781 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -91,8 +91,8 @@ to retain. The choice is arbitrary, but I will stop when 95 percent of the variance is explained by the components. This cumulated variance by the components is conveniently stored in the one-dimensional array attribute -``results.cumulated_variance``. The value at the ith index of `results.cumulated_variance` -is the sum of the variances from 0 to i. +``results.cumulated_variance``. The value at the ith index of +``results.cumulated_variance`` is the sum of the variances from 0 to i. .. code-block:: python From 8320edd7e7c3483d228351624b30045f11015b45 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Mon, 10 May 2021 11:08:32 +0200 Subject: [PATCH 10/17] Apply suggestions from fiona Co-authored-by: Fiona Naughton --- package/MDAnalysis/analysis/pca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 31705cb2781..93caa5c4152 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -73,7 +73,7 @@ import MDAnalysis as mda import MDAnalysis.analysis.pca as pca - rom MDAnalysis.tests.datafiles import PSF, DCD + from MDAnalysis.tests.datafiles import PSF, DCD Given a universe containing trajectory data we can perform Principal Component @@ -96,7 +96,7 @@ .. code-block:: python - n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] + n_pcs = np.where(PSF_pca.results.cumulated_variance > 0.95)[0][0] atomgroup = u.select_atoms('backbone') pca_space = PSF_pca.transform(atomgroup, n_components=n_pcs) From d84380125926d8618a60da8af623c5bc032bc176 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 26 May 2021 15:48:01 -0700 Subject: [PATCH 11/17] removed deprecation test for mean_atoms --- testsuite/MDAnalysisTests/analysis/test_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_pca.py b/testsuite/MDAnalysisTests/analysis/test_pca.py index 44fd2ef4f30..931a04673f8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_pca.py +++ b/testsuite/MDAnalysisTests/analysis/test_pca.py @@ -256,7 +256,7 @@ def test_compare_wrong_class(u, pca, method): @pytest.mark.parametrize("attr", ("p_components", "variance", - "cumulated_variance", "mean_atoms")) + "cumulated_variance")) def test_pca_attr_warning(u, attr): pca = PCA(u, select=SELECTION).run(stop=2) wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" From 3dfdb1553cf34bc1abe9c4036e6f70741594868e Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 27 May 2021 11:26:05 +0200 Subject: [PATCH 12/17] Smooth and tidy docs --- package/MDAnalysis/analysis/pca.py | 33 +++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 860bb88a3a0..02c85c29cfe 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -46,7 +46,8 @@ explains. Stored in :attr:`PCA.cumulated_variance`, a ratio for each number of eigenvectors up to index :math:`i` is provided to quickly find out how many principal components are needed to explain the amount of variance reflected by -those :math:`i` eigenvectors. For most data, :attr:`PCA.cumulated_variance` +those :math:`i` eigenvectors. For most data, +:attr:`PCA.results.cumulated_variance` will be approximately equal to one for some :math:`n` that is significantly smaller than the total number of components, these are the components of interest given by Principal Component Analysis. @@ -67,9 +68,7 @@ :data:`~MDAnalysis.tests.datafiles.DCD`). This tutorial shows how to use the PCA class. -First load all modules and test data - -.. code-block:: python +First load all modules and test data:: import MDAnalysis as mda import MDAnalysis.analysis.pca as pca @@ -78,9 +77,7 @@ Given a universe containing trajectory data we can perform Principal Component Analyis by using the class :class:`PCA` and retrieving the principal -components. - -.. code-block:: python +components.:: u = mda.Universe(PSF, DCD) PSF_pca = pca.PCA(u, select='backbone') @@ -91,10 +88,9 @@ to retain. The choice is arbitrary, but I will stop when 95 percent of the variance is explained by the components. This cumulated variance by the components is conveniently stored in the one-dimensional array attribute -``results.cumulated_variance``. The value at the ith index of -``results.cumulated_variance`` is the sum of the variances from 0 to i. - -.. code-block:: python +:attr:`PCA.results.cumulated_variance`. The value at the ith index of +:attr:`PCA.results.cumulated_variance` is the sum of the variances from 0 to +i.:: n_pcs = np.where(PSF_pca.results.cumulated_variance > 0.95)[0][0] atomgroup = u.select_atoms('backbone') @@ -133,9 +129,7 @@ class PCA(AnalysisBase): After initializing and calling method with a universe or an atom group, principal components ordering the atom coordinate data by decreasing - variance will be available for analysis. As an example: - - .. code-block:: python + variance will be available for analysis. As an example::: pca = PCA(universe, select='backbone').run() pca_space = pca.transform(universe.select_atoms('backbone'), 3) @@ -158,7 +152,7 @@ class PCA(AnalysisBase): structure. mean : array_like, optional Optional reference positions to be be used as the mean of the - covariance matrix. + covariance matrix. n_components : int, optional The number of principal components to be saved, default saves all principal components @@ -237,7 +231,7 @@ class PCA(AnalysisBase): .. versionchanged:: 2.0.0 ``mean_atoms`` removed, as this did not reliably contain the mean positions. - ``mean`` input now accepts coordinate arrays instead of atomgroup. + ``mean`` input now accepts coordinate arrays instead of atomgroup. :attr:`p_components`, :attr:`variance` and :attr:`cumulated_variance` are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. """ @@ -456,7 +450,8 @@ def rmsip(self, other, n_components=None): See also -------- - rmsip + :func:`rmsip` + .. versionadded:: 1.0.0 """ @@ -508,10 +503,10 @@ def cumulative_overlap(self, other, i=0, n_components=None): See also -------- - cumulative_overlap + :func:`cumulative_overlap` - .. versionadded:: 1.0.0 + .. versionadded:: 1.0.0 """ try: From 72a2bb0aba0a3b943e7c766bc0d114f4d2d269d8 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 27 May 2021 09:17:50 -0700 Subject: [PATCH 13/17] Apply suggestions from code review --- package/MDAnalysis/analysis/pca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index 02c85c29cfe..fe61f12ff50 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -450,7 +450,7 @@ def rmsip(self, other, n_components=None): See also -------- - :func:`rmsip` + :func:`~MDAnalysis.analysis.pca.rmsip` .. versionadded:: 1.0.0 @@ -503,7 +503,7 @@ def cumulative_overlap(self, other, i=0, n_components=None): See also -------- - :func:`cumulative_overlap` + :func:`~MDAnalysis.analysis.pca.cumulative_overlap` .. versionadded:: 1.0.0 From 6fadd78e20bd9d3ec8d885d587c396acc60b6c93 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 27 May 2021 09:52:39 -0700 Subject: [PATCH 14/17] doc fix thanks @IAlibay --- package/MDAnalysis/analysis/pca.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index fe61f12ff50..f5450f407f5 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -43,13 +43,13 @@ components are the eigenvectors of this matrix. For each eigenvector, its eigenvalue is the variance that the eigenvector -explains. Stored in :attr:`PCA.cumulated_variance`, a ratio for each number of -eigenvectors up to index :math:`i` is provided to quickly find out how many -principal components are needed to explain the amount of variance reflected by -those :math:`i` eigenvectors. For most data, +explains. Stored in :attr:`PCA.results.cumulated_variance`, a ratio for each +number of eigenvectors up to index :math:`i` is provided to quickly find out +how many principal components are needed to explain the amount of variance +reflected by those :math:`i` eigenvectors. For most data, :attr:`PCA.results.cumulated_variance` will be approximately equal to one for some :math:`n` that is significantly -smaller than the total number of components, these are the components of +smaller than the total number of components. These are the components of interest given by Principal Component Analysis. From here, we can project a trajectory onto these principal components and From 9b7f6a6ac70a9cf6c592e48d20875f47214165b7 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 27 May 2021 09:56:13 -0700 Subject: [PATCH 15/17] pep8 trailing whitespace fixes --- package/MDAnalysis/analysis/pca.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index f5450f407f5..f1a3e442944 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -43,10 +43,10 @@ components are the eigenvectors of this matrix. For each eigenvector, its eigenvalue is the variance that the eigenvector -explains. Stored in :attr:`PCA.results.cumulated_variance`, a ratio for each -number of eigenvectors up to index :math:`i` is provided to quickly find out -how many principal components are needed to explain the amount of variance -reflected by those :math:`i` eigenvectors. For most data, +explains. Stored in :attr:`PCA.results.cumulated_variance`, a ratio for each +number of eigenvectors up to index :math:`i` is provided to quickly find out +how many principal components are needed to explain the amount of variance +reflected by those :math:`i` eigenvectors. For most data, :attr:`PCA.results.cumulated_variance` will be approximately equal to one for some :math:`n` that is significantly smaller than the total number of components. These are the components of @@ -89,7 +89,7 @@ variance is explained by the components. This cumulated variance by the components is conveniently stored in the one-dimensional array attribute :attr:`PCA.results.cumulated_variance`. The value at the ith index of -:attr:`PCA.results.cumulated_variance` is the sum of the variances from 0 to +:attr:`PCA.results.cumulated_variance` is the sum of the variances from 0 to i.:: n_pcs = np.where(PSF_pca.results.cumulated_variance > 0.95)[0][0] From ef1c00dc7c4824352e5aeaebdd65421061fa8bd4 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 27 May 2021 11:05:46 -0700 Subject: [PATCH 16/17] cross-referenced rmsip <-> cumulative_overlap --- package/MDAnalysis/analysis/pca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index f1a3e442944..bebc9038e3b 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -450,7 +450,7 @@ def rmsip(self, other, n_components=None): See also -------- - :func:`~MDAnalysis.analysis.pca.rmsip` + :meth:`cumulative_overlap` .. versionadded:: 1.0.0 @@ -503,7 +503,7 @@ def cumulative_overlap(self, other, i=0, n_components=None): See also -------- - :func:`~MDAnalysis.analysis.pca.cumulative_overlap` + :meth:`rmsip` .. versionadded:: 1.0.0 From a2f52877adb84f1ccb77075883bfd3ef927237e2 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Thu, 27 May 2021 12:00:04 -0700 Subject: [PATCH 17/17] fixed docs, for good --- package/MDAnalysis/analysis/pca.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py index bebc9038e3b..c12d79e347c 100644 --- a/package/MDAnalysis/analysis/pca.py +++ b/package/MDAnalysis/analysis/pca.py @@ -109,6 +109,10 @@ .. autofunction:: cosine_content +.. autofunction:: rmsip + +.. autofunction:: cumulative_overlap + """ import warnings @@ -450,7 +454,7 @@ def rmsip(self, other, n_components=None): See also -------- - :meth:`cumulative_overlap` + :func:`~MDAnalysis.analysis.pca.rmsip` .. versionadded:: 1.0.0 @@ -503,7 +507,7 @@ def cumulative_overlap(self, other, i=0, n_components=None): See also -------- - :meth:`rmsip` + :func:`~MDAnalysis.analysis.pca.cumulative_overlap` .. versionadded:: 1.0.0 @@ -594,6 +598,7 @@ def rmsip(a, b, n_components=None): 0 indicates that they are mutually orthogonal, whereas 1 indicates that they are identical. + .. versionadded:: 1.0.0 """ n_components = util.asiterable(n_components) @@ -647,6 +652,7 @@ def cumulative_overlap(a, b, i=0, n_components=None): 0 indicates that they are mutually orthogonal, whereas 1 indicates that they are identical. + .. versionadded:: 1.0.0 """