diff --git a/doc/changes/devel.rst b/doc/changes/devel.rst index fdf307bbbd1..565a9f9fbf0 100644 --- a/doc/changes/devel.rst +++ b/doc/changes/devel.rst @@ -49,6 +49,7 @@ Bugs - Remove incorrect type hints in :func:`mne.io.read_raw_neuralynx` (:gh:`12236` by `Richard Höchenberger`_) - Fix bug where parent directory existence was not checked properly in :meth:`mne.io.Raw.save` (:gh:`12282` by `Eric Larson`_) - ``defusedxml`` is now an optional (rather than required) dependency and needed when reading EGI-MFF data, NEDF data, and BrainVision montages (:gh:`12264` by `Eric Larson`_) +- Correctly handle temporal gaps in Neuralynx .ncs files via :func:`mne.io.read_raw_neuralynx` (:gh:`12279` by `Kristijan Armeni`_ and `Eric Larson`_) API changes ~~~~~~~~~~~ diff --git a/environment.yml b/environment.yml index 8978dfc64e8..96c89fe472b 100644 --- a/environment.yml +++ b/environment.yml @@ -61,3 +61,4 @@ dependencies: - mamba - lazy_loader - defusedxml + - python-neo diff --git a/mne/datasets/config.py b/mne/datasets/config.py index b548f5273f2..b7780778f24 100644 --- a/mne/datasets/config.py +++ b/mne/datasets/config.py @@ -88,7 +88,7 @@ # respective repos, and make a new release of the dataset on GitHub. Then # update the checksum in the MNE_DATASETS dict below, and change version # here: ↓↓↓↓↓ ↓↓↓ -RELEASES = dict(testing="0.150", misc="0.27") +RELEASES = dict(testing="0.151", misc="0.27") TESTING_VERSIONED = f'mne-testing-data-{RELEASES["testing"]}' MISC_VERSIONED = f'mne-misc-data-{RELEASES["misc"]}' @@ -112,7 +112,7 @@ # Testing and misc are at the top as they're updated most often MNE_DATASETS["testing"] = dict( archive_name=f"{TESTING_VERSIONED}.tar.gz", - hash="md5:0b7452daef4d19132505b5639d695628", + hash="md5:5832b4d44f0423d22305fa61cb75bc25", url=( "https://codeload.github.com/mne-tools/mne-testing-data/" f'tar.gz/{RELEASES["testing"]}' diff --git a/mne/io/neuralynx/neuralynx.py b/mne/io/neuralynx/neuralynx.py index 4b6dea1a339..1c007ba5787 100644 --- a/mne/io/neuralynx/neuralynx.py +++ b/mne/io/neuralynx/neuralynx.py @@ -7,10 +7,51 @@ from ..._fiff.meas_info import create_info from ..._fiff.utils import _mult_cal_one +from ...annotations import Annotations from ...utils import _check_fname, _soft_import, fill_doc, logger, verbose from ..base import BaseRaw +class AnalogSignalGap(object): + """Dummy object to represent gaps in Neuralynx data. + + Creates a AnalogSignalProxy-like object. + Propagate `signal`, `units`, and `sampling_rate` attributes + to the `AnalogSignal` init returned by `load()`. + + Parameters + ---------- + signal : array-like + Array of shape (n_channels, n_samples) containing the data. + units : str + Units of the data. (e.g., 'uV') + sampling_rate : quantity + Sampling rate of the data. (e.g., 4000 * pq.Hz) + + Returns + ------- + sig : instance of AnalogSignal + A AnalogSignal object representing a gap in Neuralynx data. + """ + + def __init__(self, signal, units, sampling_rate): + self.signal = signal + self.units = units + self.sampling_rate = sampling_rate + + def load(self, channel_indexes): + """Return AnalogSignal object.""" + _soft_import("neo", "Reading NeuralynxIO files", strict=True) + from neo import AnalogSignal + + sig = AnalogSignal( + signal=self.signal[:, channel_indexes], + units=self.units, + sampling_rate=self.sampling_rate, + ) + return sig + + @fill_doc def read_raw_neuralynx( fname, *, preload=False, exclude_fname_patterns=None, verbose=None @@ -59,11 +100,11 @@ def __init__( exclude_fname_patterns=None, verbose=None, ): + fname = _check_fname(fname, "read", True, "fname", need_dir=True) + _soft_import("neo", "Reading NeuralynxIO files", strict=True) from neo.io import NeuralynxIO - fname = _check_fname(fname, "read", True, "fname", need_dir=True) - logger.info(f"Checking files in {fname}") # construct a list of filenames to ignore @@ -81,12 +122,18 @@ def __init__( try: nlx_reader = NeuralynxIO(dirname=fname, exclude_filename=exclude_fnames) except ValueError as e: - raise ValueError( - "It seems some .ncs channels might have different number of samples. " - + "This is likely due to different sampling rates. " - + "Try excluding them with `exclude_fname_patterns` input arg." - + f"\nOriginal neo.NeuralynxIO.parse_header() ValueError:\n{e}" - ) + # give a more informative error message and what the user can do about it + if "Incompatible section structures across streams" in str(e): + raise ValueError( + "It seems .ncs channels have different numbers of samples. " + + "This is likely due to different sampling rates. " + + "Try reading in only channels with uniform sampling rate " + + "by excluding other channels with `exclude_fname_patterns` " + + "input argument." + + f"\nOriginal neo.NeuralynxRawIO ValueError:\n{e}" + ) from None + else: + raise info = create_info( ch_types="seeg", @@ -98,32 +145,122 @@ def __init__( # the sample sizes of all segments n_segments = nlx_reader.header["nb_segment"][0] block_id = 0 # assumes there's only one block of recording - n_total_samples = sum( - nlx_reader.get_signal_size(block_id, segment) - for segment in range(n_segments) + + # get segment start/stop times + start_times = np.array( + [nlx_reader.segment_t_start(block_id, i) for i in range(n_segments)] + ) + stop_times = np.array( + [nlx_reader.segment_t_stop(block_id, i) for i in range(n_segments)] ) - # construct an array of shape (n_total_samples,) indicating - # segment membership for each sample - sample2segment = np.concatenate( + # find discontinuous boundaries (of length n-1) + next_start_times = start_times[1::] + previous_stop_times = stop_times[:-1] + seg_diffs = next_start_times - previous_stop_times + + # mark as discontinuous any two segments that have + # start/stop delta larger than sampling period (1/sampling_rate) + logger.info("Checking for temporal discontinuities in Neo data segments.") + delta = 1.5 / info["sfreq"] + gaps = seg_diffs > delta + + seg_gap_dict = {} + + logger.info( + f"N = {gaps.sum()} discontinuous Neo segments detected " + + f"with delta > {delta} sec. " + + "Annotating gaps as BAD_ACQ_SKIP." + if gaps.any() + else "No discontinuities detected." + ) + + gap_starts = stop_times[:-1][gaps] # gap starts at segment offset + gap_stops = start_times[1::][gaps] # gap stops at segment onset + + # (n_gaps,) array of ints giving number of samples per inferred gap + gap_n_samps = np.array( + [ + int(round(stop * info["sfreq"])) - int(round(start * info["sfreq"])) + for start, stop in zip(gap_starts, gap_stops) + ] + ).astype(int) # force an int array (if no gaps, empty array is a float) + + # get sort indices for all segments (valid and gap) in ascending order + all_starts_ids = np.argsort(np.concatenate([start_times, gap_starts])) + + # variable indicating whether each segment is a gap or not + gap_indicator = np.concatenate( [ - np.full(shape=(nlx_reader.get_signal_size(block_id, i),), fill_value=i) - for i in range(n_segments) + np.full(len(start_times), fill_value=0), + np.full(len(gap_starts), fill_value=1), ] ) + gap_indicator = gap_indicator[all_starts_ids].astype(bool) + + # store this in a dict to be passed to _raw_extras + seg_gap_dict = { + "gap_n_samps": gap_n_samps, + "isgap": gap_indicator, # False (data segment) or True (gap segment) + } + + valid_segment_sizes = [ + nlx_reader.get_signal_size(block_id, i) for i in range(n_segments) + ] + + sizes_sorted = np.concatenate([valid_segment_sizes, gap_n_samps])[ + all_starts_ids + ] + + # now construct an (n_samples,) indicator variable + sample2segment = np.concatenate( + [np.full(shape=(n,), fill_value=i) for i, n in enumerate(sizes_sorted)] + ) + + # construct Annotations() + gap_seg_ids = np.unique(sample2segment)[gap_indicator] + gap_start_ids = np.array( + [np.where(sample2segment == seg_id)[0][0] for seg_id in gap_seg_ids] + ) + + # recreate time axis for gap annotations + mne_times = np.arange(0, len(sample2segment)) / info["sfreq"] + + assert len(gap_start_ids) == len(gap_n_samps) + annotations = Annotations( + onset=[mne_times[onset_id] for onset_id in gap_start_ids], + duration=[ + mne_times[onset_id + (n - 1)] - mne_times[onset_id] + for onset_id, n in zip(gap_start_ids, gap_n_samps) + ], + description=["BAD_ACQ_SKIP"] * len(gap_start_ids), + ) super(RawNeuralynx, self).__init__( info=info, - last_samps=[n_total_samples - 1], + last_samps=[sizes_sorted.sum() - 1], filenames=[fname], preload=preload, - raw_extras=[dict(smp2seg=sample2segment, exclude_fnames=exclude_fnames)], + raw_extras=[ + dict( + smp2seg=sample2segment, + exclude_fnames=exclude_fnames, + segment_sizes=sizes_sorted, + seg_gap_dict=seg_gap_dict, + ) + ], ) + self.set_annotations(annotations) + def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): """Read a chunk of raw data.""" + from neo import Segment from neo.io import NeuralynxIO + # quantities is a dependency of neo so we are guaranteed it exists + from quantities import Hz + nlx_reader = NeuralynxIO( dirname=self._filenames[fi], exclude_filename=self._raw_extras[0]["exclude_fnames"], @@ -136,13 +273,7 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): [len(segment.analogsignals) for segment in neo_block[0].segments] ) == len(neo_block[0].segments) - # collect sizes of each segment - segment_sizes = np.array( - [ - nlx_reader.get_signal_size(0, segment_id) - for segment_id in range(len(neo_block[0].segments)) - ] - ) + segment_sizes = self._raw_extras[fi]["segment_sizes"] # construct a (n_segments, 2) array of the first and last # sample index for each segment relative to the start of the recording @@ -188,15 +319,44 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): -1, 0 ] # express stop sample relative to segment onset + # array containing Segments + segments_arr = np.array(neo_block[0].segments, dtype=object) + + # if gaps were detected, correctly insert gap Segments in between valid Segments + gap_samples = self._raw_extras[fi]["seg_gap_dict"]["gap_n_samps"] + gap_segments = [Segment(f"gap-{i}") for i in range(len(gap_samples))] + + # create AnalogSignal objects representing gap data filled with 0's + sfreq = nlx_reader.get_signal_sampling_rate() + n_chans = ( + np.arange(idx.start, idx.stop, idx.step).size + if type(idx) is slice + else len(idx) # idx can be a slice or an np.array so check both + ) + + for seg, n in zip(gap_segments, gap_samples): + asig = AnalogSignalGap( + signal=np.zeros((n, n_chans)), units="uV", sampling_rate=sfreq * Hz + ) + seg.analogsignals.append(asig) + + n_total_segments = len(neo_block[0].segments + gap_segments) + segments_arr = np.zeros((n_total_segments,), dtype=object) + + # insert inferred gap segments at the right place in between valid segments + isgap = self._raw_extras[0]["seg_gap_dict"]["isgap"] + segments_arr[~isgap] = neo_block[0].segments + segments_arr[isgap] = gap_segments + # now load data from selected segments/channels via - # neo.Segment.AnalogSignal.load() + # neo.Segment.AnalogSignal.load() or AnalogSignalGap.load() all_data = np.concatenate( [ signal.load(channel_indexes=idx).magnitude[ samples[0] : samples[-1] + 1, : ] for seg, samples in zip( - neo_block[0].segments[first_seg : last_seg + 1], sel_samples_local + segments_arr[first_seg : last_seg + 1], sel_samples_local ) for signal in seg.analogsignals ] diff --git a/mne/io/neuralynx/tests/test_neuralynx.py b/mne/io/neuralynx/tests/test_neuralynx.py index 21cb73927a8..1532845ab7a 100644 --- a/mne/io/neuralynx/tests/test_neuralynx.py +++ b/mne/io/neuralynx/tests/test_neuralynx.py @@ -15,6 +15,8 @@ testing_path = data_path(download=False) / "neuralynx" +pytest.importorskip("neo") + def _nlxheader_to_dict(matdict: Dict) -> Dict: """Convert the read-in "Header" field into a dict. @@ -65,14 +67,42 @@ def _read_nlx_mat_chan(matfile: str) -> np.ndarray: return x -mne_testing_ncs = [ - "LAHC1.ncs", - "LAHC2.ncs", - "LAHC3.ncs", - "LAHCu1.ncs", # the 'u' files are going to be filtered out - "xAIR1.ncs", - "xEKG1.ncs", -] +def _read_nlx_mat_chan_keep_gaps(matfile: str) -> np.ndarray: + """Read a single channel from a Neuralynx .mat file and keep invalid samples.""" + mat = loadmat(matfile) + + hdr_dict = _nlxheader_to_dict(mat) + + # Nlx2MatCSC.m reads the data in N equal-sized (512-item) chunks + # this array (1, n_chunks) stores the number of valid samples + # per chunk (the last chunk is usually shorter) + n_valid_samples = mat["NumberOfValidSamples"].ravel() + + # read in the artificial zeros so that + # we can compare with the mne padded arrays + ncs_records_with_gaps = [9, 15, 20] + for i in ncs_records_with_gaps: + n_valid_samples[i] = 512 + + # concatenate chunks, respecting the number of valid samples + x = np.concatenate( + [mat["Samples"][0:n, i] for i, n in enumerate(n_valid_samples)] + ) # in ADBits + + # this value is the same for all channels and + # converts data from ADBits to Volts + conversionf = literal_eval(hdr_dict["ADBitVolts"]) + x = x * conversionf + + # if header says input was inverted at acquisition + # (possibly for spike detection or so?), flip it back + # NeuralynxIO does this under the hood in NeuralynxIO.parse_header() + # see this discussion: https://github.com/NeuralEnsemble/python-neo/issues/819 + if hdr_dict["InputInverted"] == "True": + x *= -1 + + return x + expected_chan_names = ["LAHC1", "LAHC2", "LAHC3", "xAIR1", "xEKG1"] @@ -80,15 +110,20 @@ def _read_nlx_mat_chan(matfile: str) -> np.ndarray: @requires_testing_data def test_neuralynx(): """Test basic reading.""" - pytest.importorskip("neo") - from neo.io import NeuralynxIO - excluded_ncs_files = ["LAHCu1.ncs", "LAHCu2.ncs", "LAHCu3.ncs"] + excluded_ncs_files = [ + "LAHCu1.ncs", + "LAHC1_3_gaps.ncs", + "LAHC2_3_gaps.ncs", + ] # ==== MNE-Python ==== # + fname_patterns = ["*u*.ncs", "*3_gaps.ncs"] raw = read_raw_neuralynx( - fname=testing_path, preload=True, exclude_fname_patterns=["*u*.ncs"] + fname=testing_path, + preload=True, + exclude_fname_patterns=fname_patterns, ) # test that channel selection worked @@ -136,5 +171,52 @@ def test_neuralynx(): ) # data _test_raw_reader( - read_raw_neuralynx, fname=testing_path, exclude_fname_patterns=["*u*.ncs"] + read_raw_neuralynx, + fname=testing_path, + exclude_fname_patterns=fname_patterns, + ) + + +@requires_testing_data +def test_neuralynx_gaps(): + """Test gap detection.""" + # ignore files with no gaps + ignored_ncs_files = [ + "LAHC1.ncs", + "LAHC2.ncs", + "LAHC3.ncs", + "xAIR1.ncs", + "xEKG1.ncs", + "LAHCu1.ncs", + ] + raw = read_raw_neuralynx( + fname=testing_path, + preload=True, + exclude_fname_patterns=ignored_ncs_files, + ) + mne_y, _ = raw.get_data(return_times=True) # in V + + # there should be 2 channels with 3 gaps (of 130 samples in total) + n_expected_gaps = 3 + n_expected_missing_samples = 130 + assert len(raw.annotations) == n_expected_gaps, "Wrong number of gaps detected" + assert ( + (mne_y[0, :] == 0).sum() == n_expected_missing_samples + ), "Number of true and inferred missing samples differ" + + # read in .mat files containing original gaps + matchans = ["LAHC1_3_gaps.mat", "LAHC2_3_gaps.mat"] + + # (n_chan, n_samples) array, in V + mat_y = np.stack( + [ + _read_nlx_mat_chan_keep_gaps(os.path.join(testing_path, ch)) + for ch in matchans + ] + ) + + # compare originally modified .ncs arrays with MNE-padded arrays + # and test that we back-inserted 0's at the right places + assert_allclose( + mne_y, mat_y, rtol=1e-6, err_msg="MNE and Nlx2MatCSC.m not all close" ) diff --git a/mne/utils/config.py b/mne/utils/config.py index 77b94508114..62b4d053012 100644 --- a/mne/utils/config.py +++ b/mne/utils/config.py @@ -684,6 +684,7 @@ def sys_info( "mne-connectivity", "mne-icalabel", "mne-bids-pipeline", + "neo", "", ) if dependencies == "developer": diff --git a/pyproject.toml b/pyproject.toml index 39c6876e43d..092e4dd102a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ full = [ "pybv", "snirf", "defusedxml", + "neo", ] # Dependencies for running the test infrastructure @@ -135,6 +136,7 @@ test_extra = [ "imageio>=2.6.1", "imageio-ffmpeg>=0.4.1", "snirf", + "neo", ] # Dependencies for building the docuemntation