diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc index 13de6d4ccd3..2b2a3d45067 100644 --- a/doc/changes/latest.inc +++ b/doc/changes/latest.inc @@ -45,6 +45,7 @@ Enhancements - Add ``==`` and ``!=`` comparison between `mne.Projection` objects (:gh:`11147` by `Mathieu Scheltienne`_) - Parse automatically temperature channel with :func:`mne.io.read_raw_edf` (:gh:`11150` by `Eric Larson`_ and `Alex Gramfort`_) - Add ``encoding`` parameter to :func:`mne.io.read_raw_edf` and :func:`mne.io.read_raw_bdf` to support custom (non-UTF8) annotation channel encodings (:gh:`11154` by `Clemens Brunner`_) +- :class:`mne.preprocessing.ICA` gained a new method, :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio`, that allows the retrieval of the proportion of variance explained by ICA components (:gh:`11141` by `Richard Höchenberger`_) Bugs ~~~~ @@ -58,6 +59,7 @@ Bugs - Fix bug in :func:`mne.viz.plot_filter` when plotting filters created using ``output='ba'`` mode with ``compensation`` turned on. (:gh:`11040` by `Marian Dovgialo`_) - Fix bug in :func:`mne.io.read_raw_bti` where EEG, EMG, and H/VEOG channels were not detected properly, and many non-ECG channels were called ECG. The logic has been improved, and any channels of unknown type are now labeled as ``misc`` (:gh:`11102` by `Eric Larson`_) - Fix bug in :func:`mne.viz.plot_topomap` when providing ``sphere="eeglab"`` (:gh:`11081` by `Mathieu Scheltienne`_) +- The string and HTML representation of :class:`mne.preprocessing.ICA` reported incorrect values for the explained variance. This information has been removed from the representations, and should instead be retrieved via the new :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio` method (:gh:`11141` by `Richard Höchenberger`_) API changes ~~~~~~~~~~~ diff --git a/mne/html_templates/repr/ica.html.jinja b/mne/html_templates/repr/ica.html.jinja index 0fb0e053b4e..080c3cd5e95 100644 --- a/mne/html_templates/repr/ica.html.jinja +++ b/mne/html_templates/repr/ica.html.jinja @@ -12,10 +12,6 @@ ICA components {{ n_components }} - - Explained variance - {{ (explained_variance * 100) | round(1) }} % - Available PCA components {{ n_pca_components }} diff --git a/mne/preprocessing/ica.py b/mne/preprocessing/ica.py index 16e7ab63b44..b9c42b503c4 100644 --- a/mne/preprocessing/ica.py +++ b/mne/preprocessing/ica.py @@ -8,11 +8,13 @@ from inspect import isfunction from collections import namedtuple +from collections.abc import Sequence from copy import deepcopy from numbers import Integral from time import time from dataclasses import dataclass from typing import Optional, List +import warnings import math import json @@ -452,7 +454,6 @@ class _InfosForRepr: fit_n_samples: Optional[int] fit_n_components: Optional[int] fit_n_pca_components: Optional[int] - fit_explained_variance: Optional[float] ch_types: List[str] excludes: List[str] @@ -470,11 +471,6 @@ class _InfosForRepr: fit_n_pca_components = getattr(self, 'pca_components_', None) if fit_n_pca_components is not None: fit_n_pca_components = len(self.pca_components_) - fit_explained_variance = getattr(self, 'pca_explained_variance_', None) - if fit_explained_variance is not None: - abs_vars = self.pca_explained_variance_ - rel_vars = abs_vars / abs_vars.sum() - fit_explained_variance = rel_vars[:fit_n_components].sum() if self.info is not None: ch_types = [c for c in _DATA_CH_TYPES_SPLIT if c in self] @@ -493,7 +489,6 @@ class _InfosForRepr: fit_n_samples=fit_n_samples, fit_n_components=fit_n_components, fit_n_pca_components=fit_n_pca_components, - fit_explained_variance=fit_explained_variance, ch_types=ch_types, excludes=excludes ) @@ -511,8 +506,6 @@ def __repr__(self): f' (fit in {infos.fit_n_iter} iterations on ' f'{infos.fit_n_samples} samples), ' f'{infos.fit_n_components} ICA components ' - f'explaining {round(infos.fit_explained_variance * 100, 1)} % ' - f'of variance ' f'({infos.fit_n_pca_components} PCA components available), ' f'channel types: {", ".join(infos.ch_types)}, ' f'{len(infos.excludes) or "no"} sources marked for exclusion' @@ -531,7 +524,6 @@ def _repr_html_(self): n_samples=infos.fit_n_samples, n_components=infos.fit_n_components, n_pca_components=infos.fit_n_pca_components, - explained_variance=infos.fit_explained_variance, ch_types=infos.ch_types, excludes=infos.excludes ) @@ -962,6 +954,141 @@ def get_components(self): return np.dot(self.mixing_matrix_[:, :self.n_components_].T, self.pca_components_[:self.n_components_]).T + def get_explained_variance_ratio( + self, inst, *, components=None, ch_type=None + ): + """Get the proportion of data variance explained by ICA components. + + Parameters + ---------- + inst : mne.io.BaseRaw | mne.BaseEpochs | mne.Evoked + The uncleaned data. + components : array-like of int | int | None + The component(s) for which to do the calculation. If more than one + component is specified, explained variance will be calculated + jointly across all supplied components. If ``None`` (default), uses + all available components. + ch_type : 'mag' | 'grad' | 'planar1' | 'planar2' | 'eeg' | array-like of str | None + The channel type(s) to include in the calculation. If ``None``, all + available channel types will be used. + + Returns + ------- + dict (str, float) + The fraction of variance in ``inst`` that can be explained by the + ICA components, calculated separately for each channel type. + Dictionary keys are the channel types, and corresponding explained + variance ratios are the values. + + Notes + ----- + A value similar to EEGLAB's ``pvaf`` (percent variance accounted for) + will be calculated for the specified component(s). + + Since ICA components cannot be assumed to be aligned orthogonally, the + sum of the proportion of variance explained by all components may not + be equal to 1. In certain situations, the proportion of variance + explained by a component may even be negative. + + .. versionadded:: 1.2 + """ # noqa: E501 + if self.current_fit == 'unfitted': + raise ValueError('ICA must be fitted first.') + + _validate_type( + item=inst, types=(BaseRaw, BaseEpochs, Evoked), + item_name='inst' + ) + _validate_type( + item=components, types=(None, 'int-like', Sequence, np.ndarray), + item_name='components', type_name='int, array-like of int, or None' + ) + if isinstance(components, (Sequence, np.ndarray)): + for item in components: + _validate_type( + item=item, types='int-like', + item_name='Elements of "components"' + ) + + _validate_type( + item=ch_type, types=(Sequence, np.ndarray, str, None), + item_name='ch_type', type_name='str, array-like of str, or None' + ) + if isinstance(ch_type, str): + ch_types = [ch_type] + elif ch_type is None: + ch_types = inst.get_channel_types(unique=True, only_data_chs=True) + else: + assert isinstance(ch_type, (Sequence, np.ndarray)) + ch_types = ch_type + + assert len(ch_types) >= 1 + allowed_ch_types = ('mag', 'grad', 'planar1', 'planar2', 'eeg') + for ch_type in ch_types: + if ch_type not in allowed_ch_types: + raise ValueError( + f'You requested operation on the channel type ' + f'"{ch_type}", but only the following channel types are ' + f'supported: {", ".join(allowed_ch_types)}' + ) + del ch_type + + # Input data validation ends here + if components is None: + components = range(self.n_components_) + + explained_var_ratios = [ + self._get_explained_variance_ratio_one_ch_type( + inst=inst, components=components, ch_type=ch_type + ) for ch_type in ch_types + ] + result = dict(zip(ch_types, explained_var_ratios)) + return result + + def _get_explained_variance_ratio_one_ch_type( + self, *, inst, components, ch_type + ): + # The algorithm implemented below should be equivalent to + # https://sccn.ucsd.edu/pipermail/eeglablist/2014/009134.html + # + # Reconstruct ("back-project") the data using only the specified ICA + # components. Don't make use of potential "spare" PCA components in + # this process – we're only interested in the contribution of the ICA + # components! + kwargs = dict( + inst=inst.copy(), + include=[components], + exclude=[], + n_pca_components=0, + verbose=False, + ) + if ( + isinstance(inst, (BaseEpochs, Evoked)) and + inst.baseline is not None + ): + # Don't warn if data was baseline-corrected. + with warnings.catch_warnings(): + warnings.filterwarnings( + action='ignore', + message='The data.*was baseline-corrected', + category=RuntimeWarning + ) + inst_recon = self.apply(**kwargs) + else: + inst_recon = self.apply(**kwargs) + + data_recon = inst_recon.get_data(picks=ch_type) + data_orig = inst.get_data(picks=ch_type) + data_diff = data_orig - data_recon + + # To estimate the data variance, we first compute the variance across + # channels at each time point, and then we average these variances. + mean_var_diff = data_diff.var(axis=0).mean() + mean_var_orig = data_orig.var(axis=0).mean() + + var_explained_ratio = 1 - mean_var_diff / mean_var_orig + return var_explained_ratio + def get_sources(self, inst, add_channels=None, start=None, stop=None): """Estimate sources given the unmixing matrix. @@ -2247,6 +2374,8 @@ def _find_sources(sources, target, score_func): def _ica_explained_variance(ica, inst, normalize=False): """Check variance accounted for by each component in supplied data. + This function is only used for sorting the components. + Parameters ---------- ica : ICA diff --git a/mne/preprocessing/tests/test_ica.py b/mne/preprocessing/tests/test_ica.py index 75c61c14236..ad203756211 100644 --- a/mne/preprocessing/tests/test_ica.py +++ b/mne/preprocessing/tests/test_ica.py @@ -490,8 +490,6 @@ def test_ica_core(method, n_components, noise_cov, n_pca_components, assert 'raw data decomposition' in repr_ assert f'{ica.n_components_} ICA components' in repr_ assert 'Available PCA components' in repr_html_ - assert 'Explained variance' in repr_html_ - assert ('mag' in ica) # should now work without error # test re-fit @@ -949,6 +947,95 @@ def f(x, y): ica.fit(raw_, picks=picks, reject_by_annotation=True) +@requires_sklearn +def test_get_explained_variance_ratio(tmp_path, short_raw_epochs): + """Test ICA.get_explained_variance_ratio().""" + raw, epochs, _ = short_raw_epochs + ica = ICA(max_iter=1) + + # Unfitted ICA should raise an exception + with pytest.raises(ValueError, match='ICA must be fitted first'): + ica.get_explained_variance_ratio(epochs) + + with pytest.warns(RuntimeWarning, match='were baseline-corrected'): + ica.fit(epochs) + + # components = int, ch_type = None + explained_var_comp_0 = ica.get_explained_variance_ratio( + epochs, components=0 + ) + # components = int, ch_type = str + explained_var_comp_0_eeg = ica.get_explained_variance_ratio( + epochs, components=0, ch_type='eeg' + ) + # components = int, ch_type = list of str + explained_var_comp_0_eeg_mag = ica.get_explained_variance_ratio( + epochs, components=0, ch_type=['eeg', 'mag'] + ) + # components = list of int, single element, ch_type = None + explained_var_comp_1 = ica.get_explained_variance_ratio( + epochs, components=[1] + ) + # components = list of int, multiple elements, ch_type = None + explained_var_comps_01 = ica.get_explained_variance_ratio( + epochs, components=[0, 1] + ) + # components = None, i.e., all components, ch_type = None + explained_var_comps_all = ica.get_explained_variance_ratio( + epochs, components=None + ) + + assert 'grad' in explained_var_comp_0 + assert 'mag' in explained_var_comp_0 + assert 'eeg' in explained_var_comp_0 + + assert len(explained_var_comp_0_eeg) == 1 + assert 'eeg' in explained_var_comp_0_eeg + + assert 'mag' in explained_var_comp_0_eeg_mag + assert 'eeg' in explained_var_comp_0_eeg_mag + assert 'grad' not in explained_var_comp_0_eeg_mag + + assert round(explained_var_comp_0['grad'], 4) == 0.1784 + assert round(explained_var_comp_0['mag'], 4) == 0.0259 + assert round(explained_var_comp_0['eeg'], 4) == 0.0229 + + assert np.isclose( + explained_var_comp_0['eeg'], + explained_var_comp_0_eeg['eeg'] + ) + assert np.isclose( + explained_var_comp_0['mag'], + explained_var_comp_0_eeg_mag['mag'] + ) + assert np.isclose( + explained_var_comp_0['eeg'], + explained_var_comp_0_eeg_mag['eeg'] + ) + + assert round(explained_var_comp_1['eeg'], 4) == 0.0231 + assert round(explained_var_comps_01['eeg'], 4) == 0.0459 + assert ( + explained_var_comps_all['grad'] == + explained_var_comps_all['mag'] == + explained_var_comps_all['eeg'] == + 1 + ) + + # Test Raw + ica.get_explained_variance_ratio(raw) + # Test Evoked + evoked = epochs.average() + ica.get_explained_variance_ratio(evoked) + # Test Evoked without baseline correction + evoked.baseline = None + ica.get_explained_variance_ratio(evoked) + + # Test invalid ch_type + with pytest.raises(ValueError, match='only the following channel types'): + ica.get_explained_variance_ratio(raw, ch_type='foobar') + + @requires_sklearn @pytest.mark.slowtest @pytest.mark.parametrize('method, cov', [ diff --git a/tutorials/preprocessing/40_artifact_correction_ica.py b/tutorials/preprocessing/40_artifact_correction_ica.py index ae04d9cf4fe..8a95b0ab471 100644 --- a/tutorials/preprocessing/40_artifact_correction_ica.py +++ b/tutorials/preprocessing/40_artifact_correction_ica.py @@ -213,8 +213,8 @@ filt_raw = raw.copy().filter(l_freq=1., h_freq=None) # %% -# Fitting and plotting the ICA solution -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Fitting ICA +# ~~~~~~~~~~~ # # .. admonition:: Ignoring the time domain # :class: sidebar hint @@ -262,8 +262,42 @@ # speed-up) and ``reject`` (for providing a rejection dictionary for maximum # acceptable peak-to-peak amplitudes for each channel type, just like we used # when creating epoched data in the :ref:`tut-overview` tutorial). -# + +# %% +# Looking at the ICA solution +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Now we can examine the ICs to see what they captured. +# +# Using :meth:`~mne.preprocessing.ICA.get_explained_variance_ratio`, we can +# retrieve the fraction of variance in the original data that is explained by +# our ICA components in the form of a dictionary: + +explained_var_ratio = ica.get_explained_variance_ratio(filt_raw) +for channel_type, ratio in explained_var_ratio.items(): + print( + f'Fraction of {channel_type} variance explained by all components: ' + f'{ratio}' + ) + +# %% +# The values were calculated for all ICA components jointly, but separately for +# each channel type (here: magnetometers and EEG). +# +# We can also explicitly request for which component(s) and channel type(s) to +# perform the computation: +explained_var_ratio = ica.get_explained_variance_ratio( + filt_raw, + components=[0], + ch_type='eeg' +) +# This time, print as percentage. +ratio_percent = round(100 * explained_var_ratio['eeg']) +print( + f'Fraction of variance in EEG signal explained by first component: ' + f'{ratio_percent}%' +) + +# %% # `~mne.preprocessing.ICA.plot_sources` will show the time series of the # ICs. Note that in our call to `~mne.preprocessing.ICA.plot_sources` we # can use the original, unfiltered `~mne.io.Raw` object. A helpful tip is that