diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc
index 13de6d4ccd3..2b2a3d45067 100644
--- a/doc/changes/latest.inc
+++ b/doc/changes/latest.inc
@@ -45,6 +45,7 @@ Enhancements
- Add ``==`` and ``!=`` comparison between `mne.Projection` objects (:gh:`11147` by `Mathieu Scheltienne`_)
- Parse automatically temperature channel with :func:`mne.io.read_raw_edf` (:gh:`11150` by `Eric Larson`_ and `Alex Gramfort`_)
- Add ``encoding`` parameter to :func:`mne.io.read_raw_edf` and :func:`mne.io.read_raw_bdf` to support custom (non-UTF8) annotation channel encodings (:gh:`11154` by `Clemens Brunner`_)
+- :class:`mne.preprocessing.ICA` gained a new method, :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio`, that allows the retrieval of the proportion of variance explained by ICA components (:gh:`11141` by `Richard Höchenberger`_)
Bugs
~~~~
@@ -58,6 +59,7 @@ Bugs
- Fix bug in :func:`mne.viz.plot_filter` when plotting filters created using ``output='ba'`` mode with ``compensation`` turned on. (:gh:`11040` by `Marian Dovgialo`_)
- Fix bug in :func:`mne.io.read_raw_bti` where EEG, EMG, and H/VEOG channels were not detected properly, and many non-ECG channels were called ECG. The logic has been improved, and any channels of unknown type are now labeled as ``misc`` (:gh:`11102` by `Eric Larson`_)
- Fix bug in :func:`mne.viz.plot_topomap` when providing ``sphere="eeglab"`` (:gh:`11081` by `Mathieu Scheltienne`_)
+- The string and HTML representation of :class:`mne.preprocessing.ICA` reported incorrect values for the explained variance. This information has been removed from the representations, and should instead be retrieved via the new :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio` method (:gh:`11141` by `Richard Höchenberger`_)
API changes
~~~~~~~~~~~
diff --git a/mne/html_templates/repr/ica.html.jinja b/mne/html_templates/repr/ica.html.jinja
index 0fb0e053b4e..080c3cd5e95 100644
--- a/mne/html_templates/repr/ica.html.jinja
+++ b/mne/html_templates/repr/ica.html.jinja
@@ -12,10 +12,6 @@
ICA components |
{{ n_components }} |
-
- | Explained variance |
- {{ (explained_variance * 100) | round(1) }} % |
-
| Available PCA components |
{{ n_pca_components }} |
diff --git a/mne/preprocessing/ica.py b/mne/preprocessing/ica.py
index 16e7ab63b44..b9c42b503c4 100644
--- a/mne/preprocessing/ica.py
+++ b/mne/preprocessing/ica.py
@@ -8,11 +8,13 @@
from inspect import isfunction
from collections import namedtuple
+from collections.abc import Sequence
from copy import deepcopy
from numbers import Integral
from time import time
from dataclasses import dataclass
from typing import Optional, List
+import warnings
import math
import json
@@ -452,7 +454,6 @@ class _InfosForRepr:
fit_n_samples: Optional[int]
fit_n_components: Optional[int]
fit_n_pca_components: Optional[int]
- fit_explained_variance: Optional[float]
ch_types: List[str]
excludes: List[str]
@@ -470,11 +471,6 @@ class _InfosForRepr:
fit_n_pca_components = getattr(self, 'pca_components_', None)
if fit_n_pca_components is not None:
fit_n_pca_components = len(self.pca_components_)
- fit_explained_variance = getattr(self, 'pca_explained_variance_', None)
- if fit_explained_variance is not None:
- abs_vars = self.pca_explained_variance_
- rel_vars = abs_vars / abs_vars.sum()
- fit_explained_variance = rel_vars[:fit_n_components].sum()
if self.info is not None:
ch_types = [c for c in _DATA_CH_TYPES_SPLIT if c in self]
@@ -493,7 +489,6 @@ class _InfosForRepr:
fit_n_samples=fit_n_samples,
fit_n_components=fit_n_components,
fit_n_pca_components=fit_n_pca_components,
- fit_explained_variance=fit_explained_variance,
ch_types=ch_types,
excludes=excludes
)
@@ -511,8 +506,6 @@ def __repr__(self):
f' (fit in {infos.fit_n_iter} iterations on '
f'{infos.fit_n_samples} samples), '
f'{infos.fit_n_components} ICA components '
- f'explaining {round(infos.fit_explained_variance * 100, 1)} % '
- f'of variance '
f'({infos.fit_n_pca_components} PCA components available), '
f'channel types: {", ".join(infos.ch_types)}, '
f'{len(infos.excludes) or "no"} sources marked for exclusion'
@@ -531,7 +524,6 @@ def _repr_html_(self):
n_samples=infos.fit_n_samples,
n_components=infos.fit_n_components,
n_pca_components=infos.fit_n_pca_components,
- explained_variance=infos.fit_explained_variance,
ch_types=infos.ch_types,
excludes=infos.excludes
)
@@ -962,6 +954,141 @@ def get_components(self):
return np.dot(self.mixing_matrix_[:, :self.n_components_].T,
self.pca_components_[:self.n_components_]).T
+ def get_explained_variance_ratio(
+ self, inst, *, components=None, ch_type=None
+ ):
+ """Get the proportion of data variance explained by ICA components.
+
+ Parameters
+ ----------
+ inst : mne.io.BaseRaw | mne.BaseEpochs | mne.Evoked
+ The uncleaned data.
+ components : array-like of int | int | None
+ The component(s) for which to do the calculation. If more than one
+ component is specified, explained variance will be calculated
+ jointly across all supplied components. If ``None`` (default), uses
+ all available components.
+ ch_type : 'mag' | 'grad' | 'planar1' | 'planar2' | 'eeg' | array-like of str | None
+ The channel type(s) to include in the calculation. If ``None``, all
+ available channel types will be used.
+
+ Returns
+ -------
+ dict (str, float)
+ The fraction of variance in ``inst`` that can be explained by the
+ ICA components, calculated separately for each channel type.
+ Dictionary keys are the channel types, and corresponding explained
+ variance ratios are the values.
+
+ Notes
+ -----
+ A value similar to EEGLAB's ``pvaf`` (percent variance accounted for)
+ will be calculated for the specified component(s).
+
+ Since ICA components cannot be assumed to be aligned orthogonally, the
+ sum of the proportion of variance explained by all components may not
+ be equal to 1. In certain situations, the proportion of variance
+ explained by a component may even be negative.
+
+ .. versionadded:: 1.2
+ """ # noqa: E501
+ if self.current_fit == 'unfitted':
+ raise ValueError('ICA must be fitted first.')
+
+ _validate_type(
+ item=inst, types=(BaseRaw, BaseEpochs, Evoked),
+ item_name='inst'
+ )
+ _validate_type(
+ item=components, types=(None, 'int-like', Sequence, np.ndarray),
+ item_name='components', type_name='int, array-like of int, or None'
+ )
+ if isinstance(components, (Sequence, np.ndarray)):
+ for item in components:
+ _validate_type(
+ item=item, types='int-like',
+ item_name='Elements of "components"'
+ )
+
+ _validate_type(
+ item=ch_type, types=(Sequence, np.ndarray, str, None),
+ item_name='ch_type', type_name='str, array-like of str, or None'
+ )
+ if isinstance(ch_type, str):
+ ch_types = [ch_type]
+ elif ch_type is None:
+ ch_types = inst.get_channel_types(unique=True, only_data_chs=True)
+ else:
+ assert isinstance(ch_type, (Sequence, np.ndarray))
+ ch_types = ch_type
+
+ assert len(ch_types) >= 1
+ allowed_ch_types = ('mag', 'grad', 'planar1', 'planar2', 'eeg')
+ for ch_type in ch_types:
+ if ch_type not in allowed_ch_types:
+ raise ValueError(
+ f'You requested operation on the channel type '
+ f'"{ch_type}", but only the following channel types are '
+ f'supported: {", ".join(allowed_ch_types)}'
+ )
+ del ch_type
+
+ # Input data validation ends here
+ if components is None:
+ components = range(self.n_components_)
+
+ explained_var_ratios = [
+ self._get_explained_variance_ratio_one_ch_type(
+ inst=inst, components=components, ch_type=ch_type
+ ) for ch_type in ch_types
+ ]
+ result = dict(zip(ch_types, explained_var_ratios))
+ return result
+
+ def _get_explained_variance_ratio_one_ch_type(
+ self, *, inst, components, ch_type
+ ):
+ # The algorithm implemented below should be equivalent to
+ # https://sccn.ucsd.edu/pipermail/eeglablist/2014/009134.html
+ #
+ # Reconstruct ("back-project") the data using only the specified ICA
+ # components. Don't make use of potential "spare" PCA components in
+ # this process – we're only interested in the contribution of the ICA
+ # components!
+ kwargs = dict(
+ inst=inst.copy(),
+ include=[components],
+ exclude=[],
+ n_pca_components=0,
+ verbose=False,
+ )
+ if (
+ isinstance(inst, (BaseEpochs, Evoked)) and
+ inst.baseline is not None
+ ):
+ # Don't warn if data was baseline-corrected.
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ action='ignore',
+ message='The data.*was baseline-corrected',
+ category=RuntimeWarning
+ )
+ inst_recon = self.apply(**kwargs)
+ else:
+ inst_recon = self.apply(**kwargs)
+
+ data_recon = inst_recon.get_data(picks=ch_type)
+ data_orig = inst.get_data(picks=ch_type)
+ data_diff = data_orig - data_recon
+
+ # To estimate the data variance, we first compute the variance across
+ # channels at each time point, and then we average these variances.
+ mean_var_diff = data_diff.var(axis=0).mean()
+ mean_var_orig = data_orig.var(axis=0).mean()
+
+ var_explained_ratio = 1 - mean_var_diff / mean_var_orig
+ return var_explained_ratio
+
def get_sources(self, inst, add_channels=None, start=None, stop=None):
"""Estimate sources given the unmixing matrix.
@@ -2247,6 +2374,8 @@ def _find_sources(sources, target, score_func):
def _ica_explained_variance(ica, inst, normalize=False):
"""Check variance accounted for by each component in supplied data.
+ This function is only used for sorting the components.
+
Parameters
----------
ica : ICA
diff --git a/mne/preprocessing/tests/test_ica.py b/mne/preprocessing/tests/test_ica.py
index 75c61c14236..ad203756211 100644
--- a/mne/preprocessing/tests/test_ica.py
+++ b/mne/preprocessing/tests/test_ica.py
@@ -490,8 +490,6 @@ def test_ica_core(method, n_components, noise_cov, n_pca_components,
assert 'raw data decomposition' in repr_
assert f'{ica.n_components_} ICA components' in repr_
assert 'Available PCA components' in repr_html_
- assert 'Explained variance' in repr_html_
-
assert ('mag' in ica) # should now work without error
# test re-fit
@@ -949,6 +947,95 @@ def f(x, y):
ica.fit(raw_, picks=picks, reject_by_annotation=True)
+@requires_sklearn
+def test_get_explained_variance_ratio(tmp_path, short_raw_epochs):
+ """Test ICA.get_explained_variance_ratio()."""
+ raw, epochs, _ = short_raw_epochs
+ ica = ICA(max_iter=1)
+
+ # Unfitted ICA should raise an exception
+ with pytest.raises(ValueError, match='ICA must be fitted first'):
+ ica.get_explained_variance_ratio(epochs)
+
+ with pytest.warns(RuntimeWarning, match='were baseline-corrected'):
+ ica.fit(epochs)
+
+ # components = int, ch_type = None
+ explained_var_comp_0 = ica.get_explained_variance_ratio(
+ epochs, components=0
+ )
+ # components = int, ch_type = str
+ explained_var_comp_0_eeg = ica.get_explained_variance_ratio(
+ epochs, components=0, ch_type='eeg'
+ )
+ # components = int, ch_type = list of str
+ explained_var_comp_0_eeg_mag = ica.get_explained_variance_ratio(
+ epochs, components=0, ch_type=['eeg', 'mag']
+ )
+ # components = list of int, single element, ch_type = None
+ explained_var_comp_1 = ica.get_explained_variance_ratio(
+ epochs, components=[1]
+ )
+ # components = list of int, multiple elements, ch_type = None
+ explained_var_comps_01 = ica.get_explained_variance_ratio(
+ epochs, components=[0, 1]
+ )
+ # components = None, i.e., all components, ch_type = None
+ explained_var_comps_all = ica.get_explained_variance_ratio(
+ epochs, components=None
+ )
+
+ assert 'grad' in explained_var_comp_0
+ assert 'mag' in explained_var_comp_0
+ assert 'eeg' in explained_var_comp_0
+
+ assert len(explained_var_comp_0_eeg) == 1
+ assert 'eeg' in explained_var_comp_0_eeg
+
+ assert 'mag' in explained_var_comp_0_eeg_mag
+ assert 'eeg' in explained_var_comp_0_eeg_mag
+ assert 'grad' not in explained_var_comp_0_eeg_mag
+
+ assert round(explained_var_comp_0['grad'], 4) == 0.1784
+ assert round(explained_var_comp_0['mag'], 4) == 0.0259
+ assert round(explained_var_comp_0['eeg'], 4) == 0.0229
+
+ assert np.isclose(
+ explained_var_comp_0['eeg'],
+ explained_var_comp_0_eeg['eeg']
+ )
+ assert np.isclose(
+ explained_var_comp_0['mag'],
+ explained_var_comp_0_eeg_mag['mag']
+ )
+ assert np.isclose(
+ explained_var_comp_0['eeg'],
+ explained_var_comp_0_eeg_mag['eeg']
+ )
+
+ assert round(explained_var_comp_1['eeg'], 4) == 0.0231
+ assert round(explained_var_comps_01['eeg'], 4) == 0.0459
+ assert (
+ explained_var_comps_all['grad'] ==
+ explained_var_comps_all['mag'] ==
+ explained_var_comps_all['eeg'] ==
+ 1
+ )
+
+ # Test Raw
+ ica.get_explained_variance_ratio(raw)
+ # Test Evoked
+ evoked = epochs.average()
+ ica.get_explained_variance_ratio(evoked)
+ # Test Evoked without baseline correction
+ evoked.baseline = None
+ ica.get_explained_variance_ratio(evoked)
+
+ # Test invalid ch_type
+ with pytest.raises(ValueError, match='only the following channel types'):
+ ica.get_explained_variance_ratio(raw, ch_type='foobar')
+
+
@requires_sklearn
@pytest.mark.slowtest
@pytest.mark.parametrize('method, cov', [
diff --git a/tutorials/preprocessing/40_artifact_correction_ica.py b/tutorials/preprocessing/40_artifact_correction_ica.py
index ae04d9cf4fe..8a95b0ab471 100644
--- a/tutorials/preprocessing/40_artifact_correction_ica.py
+++ b/tutorials/preprocessing/40_artifact_correction_ica.py
@@ -213,8 +213,8 @@
filt_raw = raw.copy().filter(l_freq=1., h_freq=None)
# %%
-# Fitting and plotting the ICA solution
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Fitting ICA
+# ~~~~~~~~~~~
#
# .. admonition:: Ignoring the time domain
# :class: sidebar hint
@@ -262,8 +262,42 @@
# speed-up) and ``reject`` (for providing a rejection dictionary for maximum
# acceptable peak-to-peak amplitudes for each channel type, just like we used
# when creating epoched data in the :ref:`tut-overview` tutorial).
-#
+
+# %%
+# Looking at the ICA solution
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Now we can examine the ICs to see what they captured.
+#
+# Using :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio`, we can
+# retrieve the fraction of variance in the original data that is explained by
+# our ICA components in the form of a dictionary:
+
+explained_var_ratio = ica.get_explained_variance_ratio(filt_raw)
+for channel_type, ratio in explained_var_ratio.items():
+ print(
+ f'Fraction of {channel_type} variance explained by all components: '
+ f'{ratio}'
+ )
+
+# %%
+# The values were calculated for all ICA components jointly, but separately for
+# each channel type (here: magnetometers and EEG).
+#
+# We can also explicitly request for which component(s) and channel type(s) to
+# perform the computation:
+explained_var_ratio = ica.get_explained_variance_ratio(
+ filt_raw,
+ components=[0],
+ ch_type='eeg'
+)
+# This time, print as percentage.
+ratio_percent = round(100 * explained_var_ratio['eeg'])
+print(
+ f'Fraction of variance in EEG signal explained by first component: '
+ f'{ratio_percent}%'
+)
+
+# %%
# `~mne.preprocessing.ICA.plot_sources` will show the time series of the
# ICs. Note that in our call to `~mne.preprocessing.ICA.plot_sources` we
# can use the original, unfiltered `~mne.io.Raw` object. A helpful tip is that