diff --git a/README.md b/README.md index d47052b2..4c29f2f7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![codecov](https://codecov.io/gh/nbara/python-meegkit/branch/master/graph/badge.svg)](https://codecov.io/gh/nbara/python-meegkit) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nbara/python-meegkit/master) [![DOI](https://zenodo.org/badge/117451752.svg)](https://zenodo.org/badge/latestdoi/117451752) -[![twitter](https://img.shields.io/twitter/follow/lebababa?label=Twitter&style=flat&logo=Twitter)](https://twitter.com/intent/follow?screen_name=lebababa) +[![twitter](https://img.shields.io/twitter/follow/lebababa?style=flat&logo=Twitter)](https://twitter.com/intent/follow?screen_name=lebababa) # MEEGkit @@ -54,7 +54,7 @@ Other available options are `[docs]` (which installs dependencies required to bu ## References -### 1. CCA, STAR, SNS, DSS, ZapLine, and robust detrending +### 1. CCA, STAR, SNS, DSS, ZapLine, and Robust Detrending This is mostly a translation of Matlab code from the [NoiseTools toolbox](http://audition.ens.fr/adc/NoiseTools/) by Alain de Cheveigné. It builds on an initial python implementation by [Pedro Alcocer](https://github.com/pealco). @@ -83,10 +83,9 @@ If you use this code, you should cite the relevant methods from the original art Journal of Neuroscience Methods, 168(1), 195–202. https://doi.org/10.1016/j.jneumeth.2007.09.012 [10] de Cheveigné, A., & Simon, J. Z. (2007). Denoising based on time-shift PCA. Journal of Neuroscience Methods, 165(2), 297–305. https://doi.org/10.1016/j.jneumeth.2007.06.003 - ``` -### 2. Artifact subspace reconstruction (ASR) +### 2. Artifact Subspace Reconstruction (ASR) The base code is inspired from the original [EEGLAB inplementation](https://github.com/sccn/clean_rawdata) [1], while the riemannian variant [2] was adapted from the [rASR toolbox](https://github.com/s4rify/rASRMatlab) by Sarah Blum. @@ -101,7 +100,7 @@ If you use this code, you should cite the relevant methods from the original art 13, 141. ``` -### 3. Rhythmic entrainment source separation (RESS) +### 3. Rhythmic Entrainment Source Separation (RESS) The code is based on [Matlab code from Mike X. Cohen](https://mikexcohen.com/data/) [1] @@ -130,3 +129,15 @@ If you use this, you should cite the following articles: "High-speed spelling with a noninvasive brain-computer interface", Proc. Int. Natl. Acad. Sci. U. S. A, 112(44): E6058-6067, 2015. ``` + +### 5. Local Outlier Factor (LOF) + +If you use this, you should cite the following article: + +```sql +[1] Breunig M, Kriegel HP, Ng RT, Sander J. 2000. LOF: identifying density-based + local outliers. SIGMOD Rec. 29, 2, 93-104. https://doi.org/10.1145/335191.335388 +[2] Kumaravel VP, Buiatti M, Parise E, Farella E. 2022. Adaptable and Robust + EEG Bad Channel Detection Using Local Outlier Factor (LOF). Sensors (Basel). + 2022 Sep 27;22(19):7314. https://doi.org/10.3390/s22197314. +``` diff --git a/doc/index.rst b/doc/index.rst index 2a588657..23f60816 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -37,6 +37,7 @@ Here is a list of the methods and techniques available in ``meegkit``: ~meegkit.cca ~meegkit.dss ~meegkit.detrend + ~meegkit.lof ~meegkit.ress ~meegkit.sns ~meegkit.star diff --git a/doc/modules/meegkit.lof.rst b/doc/modules/meegkit.lof.rst new file mode 100644 index 00000000..ff0ea65d --- /dev/null +++ b/doc/modules/meegkit.lof.rst @@ -0,0 +1,23 @@ +meegkit.lof +=========== + +.. automodule:: meegkit.lof + + .. rubric:: Functions + + .. autosummary:: + + LOF + + + + + + + + + + + + + diff --git a/meegkit/__init__.py b/meegkit/__init__.py index 9205cfb4..0aec3dee 100644 --- a/meegkit/__init__.py +++ b/meegkit/__init__.py @@ -1,7 +1,7 @@ """M/EEG denoising utilities in python.""" __version__ = '0.1.3' -from . import asr, cca, detrend, dss, sns, star, ress, trca, tspca, utils +from . import asr, cca, detrend, dss, lof, sns, star, ress, trca, tspca, utils -__all__ = ['asr', 'cca', 'detrend', 'dss', 'ress', 'sns', 'star', 'trca', +__all__ = ['asr', 'cca', 'detrend', 'dss', 'lof', 'ress', 'sns', 'star', 'trca', 'tspca', 'utils'] diff --git a/meegkit/lof.py b/meegkit/lof.py new file mode 100644 index 00000000..9f9db51c --- /dev/null +++ b/meegkit/lof.py @@ -0,0 +1,85 @@ +"""Local Outlier Factor (LOF).""" +# Authors: Velu Prabhakar Kumaravel +# License: BSD-3-Clause + +import logging +from sklearn.neighbors import LocalOutlierFactor + + +class LOF(): + """Local Outlier Factor. + + Local Outlier Factor (LOF) is an automatic, density-based outlier detection + algorithm based on [1]_ and [2]_. + + Parameters + ---------- + n_neighbours : int + Number of neighbours defining the local neighbourhood. + metric: str in {'euclidean', 'nan_euclidean', 'cosine', + 'cityblock', 'manhattan'} + Metric to use for distance computation. Default is “euclidean” + threshold : float + Threshold to define outliers. Theoretical threshold ranges anywhere + between 1.0 and any integer. Default: 1.5 + + Notes + ----- + It is recommended to perform a CV (e.g., 10-fold) on training set to + calibrate this parameter for the given M/EEG dataset. + + See [2]_ for details. + + References + ---------- + .. [1] Breunig M, Kriegel HP, Ng RT, Sander J. + 2000. LOF: identifying density-based local outliers. + SIGMOD Rec. 29, 2, 93-104. https://doi.org/10.1145/335191.335388 + .. [2] Kumaravel VP, Buiatti M, Parise E, Farella E. + 2022. Adaptable and Robust EEG Bad Channel Detection Using + Local Outlier Factor (LOF). Sensors (Basel). 2022 Sep 27;22(19):7314. + doi: 10.3390/s22197314. PMID: 36236413; PMCID: PMC9571252. + + """ + + def __init__(self, n_neighbors=20, metric='euclidean', + threshold=1.5, **kwargs): + + self.n_neighbors = n_neighbors + self.metric = metric + self.threshold = threshold + + def predict(self, X): + """Detect bad channels using Local Outlier Factor algorithm. + + Parameters + ---------- + X : array, shape=(n_channels, n_samples) + The data X should have been high-pass filtered. + + Returns + ------- + bad_channel_indices : Detected bad channel indices. + + """ + if X.ndim == 3: # in case the input data is epoched + logging.warning('Expected input data with shape ' + '(n_channels, n_samples)') + return [] + + if self.n_neighbors >= X.shape[0]: + logging.warning('Number of neighbours cannot be greater than the ' + 'number of channels') + return [] + + if self.threshold < 1.0: + logging.warning('Invalid threshold. Try a positive integer >= 1.0') + return [] + + clf = LocalOutlierFactor(self.n_neighbors) + logging.debug('[LOF] Predicting bad channels') + clf.fit_predict(X) + lof_scores = clf.negative_outlier_factor_ + bad_channel_indices = -lof_scores >= self.threshold + + return bad_channel_indices diff --git a/tests/data/lofdata.mat b/tests/data/lofdata.mat new file mode 100644 index 00000000..ec9fd7eb Binary files /dev/null and b/tests/data/lofdata.mat differ diff --git a/tests/test_asr.py b/tests/test_asr.py index 6320689b..326e99cb 100644 --- a/tests/test_asr.py +++ b/tests/test_asr.py @@ -9,8 +9,6 @@ from meegkit.utils.matrix import sliding_window from scipy import signal -np.random.seed(9) - # Data files THIS_FOLDER = os.path.dirname(os.path.abspath(__file__)) # file = os.path.join(THIS_FOLDER, 'data', 'eeg_raw.fif') @@ -175,6 +173,7 @@ def test_asr_functions(show=False, method='riemann'): @pytest.mark.parametrize(argnames='reref', argvalues=(False, True)) def test_asr_class(method, reref, show=False): """Test ASR class (simulate online use).""" + np.random.default_rng(9) raw = np.load(os.path.join(THIS_FOLDER, 'data', 'eeg_raw.npy')) sfreq = 250 # Train on a clean portion of data diff --git a/tests/test_lof.py b/tests/test_lof.py new file mode 100644 index 00000000..b37d2382 --- /dev/null +++ b/tests/test_lof.py @@ -0,0 +1,37 @@ +"""LOF test.""" +import os + +import numpy as np +import pytest +import scipy.io as sio + +from meegkit.lof import LOF + +np.random.seed(9) + +# Data files +THIS_FOLDER = os.path.dirname(os.path.abspath(__file__)) # data folder of MEEGKIT + + +@pytest.mark.parametrize(argnames='n_neighbors', argvalues=(8, 20, 40, 2048)) +def test_lof(n_neighbors, show=False): + mat = sio.loadmat(os.path.join(THIS_FOLDER, 'data', 'lofdata.mat')) + X = mat['X'] + lof = LOF(n_neighbors) + bad_channel_indices = lof.predict(X) + print(bad_channel_indices) + +@pytest.mark.parametrize(argnames='metric', + argvalues=('euclidean', 'nan_euclidean', + 'cosine', 'cityblock', 'manhattan')) +def test_lof2(metric, show=False): + mat = sio.loadmat(os.path.join(THIS_FOLDER, 'data', 'lofdata.mat')) + X = mat['X'] + lof = LOF(20, metric) + bad_channel_indices = lof.predict(X) + print(bad_channel_indices) + +if __name__ == "__main__": + pytest.main([__file__]) + #test_lof(20, True) + #test_lof(metric='euclidean') diff --git a/tests/test_tspca.py b/tests/test_tspca.py index 67d69efd..382dad65 100644 --- a/tests/test_tspca.py +++ b/tests/test_tspca.py @@ -51,7 +51,8 @@ def test_tspca_sns_dss(): # TODO y_tspca_sns_dss = fold( np.dot(unfold(y_tspca_sns), todss), y_tspca_sns.shape[0]) - return y_tspca, y_tspca_sns, y_tspca_sns_dss + # TODO do something with it + assert y_tspca_sns_dss.shape == noisy_data.shape def test_tsr(show=True): @@ -107,6 +108,7 @@ def test_tsr(show=True): plt.show() if __name__ == '__main__': - # import pytest - # pytest.main([__file__]) - test_tsr() + import pytest + pytest.main([__file__]) + # test_tspca_sns_dss() + # test_tsr()