diff --git a/doc/manual/io.rst b/doc/manual/io.rst index 247f585babe..c809b04c772 100644 --- a/doc/manual/io.rst +++ b/doc/manual/io.rst @@ -23,6 +23,7 @@ EEG Brainvision .vhdr :func:`mne.io.read_r EEG Neuroscan CNT .cnt :func:`mne.io.read_raw_cnt` EEG European data format .edf :func:`mne.io.read_raw_edf` EEG Biosemi data format .bdf :func:`mne.io.read_raw_edf` +EEG General data format .gdf :func:`mne.io.read_raw_edf` EEG EGI simple binary .egi :func:`mne.io.read_raw_egi` EEG EEGLAB .set :func:`mne.io.read_raw_eeglab` and :func:`mne.read_epochs_eeglab` Electrode locations elc, txt, csd, sfp, htps Misc :func:`mne.channels.read_montage` @@ -262,26 +263,37 @@ European data format (.edf) EDF and EDF+ files can be read in using :func:`mne.io.read_raw_edf`. -http://www.edfplus.info/specs/edf.html +`EDF (European Data Format) `_ and +`EDF+ `_ are 16-bit formats. -EDF (European Data Format) and EDF+ are 16-bit formats -http://www.edfplus.info/specs/edfplus.html - -The EDF+ files may contain an annotation channel which can -be used to store trigger information. The Time-stamped Annotation -Lists (TALs) on the annotation data can be converted to a trigger -channel (STI 014) using an annotation map file which associates -an annotation label with a number on the trigger channel. +The EDF+ files may contain an annotation channel which can be used to store +trigger information. The Time-stamped Annotation Lists (TALs) on the +annotation data can be converted to a trigger channel (STI 014) using an +annotation map file which associates an annotation label with a number on +the trigger channel. Biosemi data format (.bdf) ========================== -The BDF format (http://www.biosemi.com/faq/file_format.htm) is a 24-bit variant -of the EDF format used by the EEG systems manufactured by a company called -BioSemi. It can also be read in using :func:`mne.io.read_raw_edf`. +The `BDF format `_ is a 24-bit +variant of the EDF format used by the EEG systems manufactured by a company +called BioSemi. It can also be read in using :func:`mne.io.read_raw_edf`. .. warning:: The data samples in a BDF file are represented in a 3-byte (24-bit) format. Since 3-byte raw data buffers are not presently supported in the fif format these data will be changed to 4-byte integers in the conversion. +General data format (.gdf) +========================== + +GDF files can be read in using :func:`mne.io.read_raw_edf`. + +`GDF (General Data Format) `_ is a flexible +format for biomedical signals, that overcomes some of the limitations of the +EDF format. The original specification (GDF v1) includes a binary header, +and uses an event table. An updated specification (GDF v2) was released in +2011 and adds fields for additional subject-specific information (gender, +age, etc.) and allows storing several physical units and other properties. +Both specifications are supported in MNE. + Neuroscan CNT data format (.cnt) ================================ diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py index ac83e95683d..fc20727cc1c 100644 --- a/mne/datasets/utils.py +++ b/mne/datasets/utils.py @@ -228,7 +228,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True, path = _get_path(path, key, name) # To update the testing or misc dataset, push commits, then make a new # release on GitHub. Then update the "releases" variable: - releases = dict(testing='0.32', misc='0.3') + releases = dict(testing='0.33', misc='0.3') # And also update the "hashes['testing']" variable below. # To update any other dataset, update the data archive itself (upload @@ -285,7 +285,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True, sample='1d5da3a809fded1ef5734444ab5bf857', somato='f3e3a8441477bb5bacae1d0c6e0964fb', spm='f61041e3f3f2ba0def8a2ca71592cc41', - testing='bcda2bb49dfa8a9400aaeeb3c4b0a072', + testing='37e965395f04ed357605e796fde104f3', multimodal='26ec847ae9ab80f58f204d09e2c08367', visual_92_categories='46c7e590f4a48596441ce001595d5e58', mtrf='273a390ebbc48da2c3184b01a82e4636', diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 75452f05976..4aca477151b 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -2,6 +2,7 @@ # Authors: Teon Brooks # Martin Billinger +# Nicolas Barascud # # License: BSD (3-clause) @@ -64,6 +65,19 @@ class RawEDF(BaseRaw): If not None, override default verbose level (see :func:`mne.verbose` and :ref:`Logging documentation ` for more). + Notes + ----- + Biosemi devices trigger codes are encoded in 16-bit format, whereas system + codes (CMS in/out-of range, battery low, etc.) are coded in bits 16-23 of + the status channel (see http://www.biosemi.com/faq/trigger_signals.htm). + To retrieve correct event values (bits 1-16), one could do: + + >>> events = mne.find_events(...) # doctest:+SKIP + >>> events[:, 2] >>= 8 # doctest:+SKIP + + It is also possible to retrieve system codes, but no particular effort has + been made to decode these in MNE. + See Also -------- mne.io.Raw : Documentation of attribute and methods. @@ -75,9 +89,9 @@ def __init__(self, input_fname, montage, eog=None, misc=None, preload=False, verbose=None): # noqa: D102 logger.info('Extracting edf Parameters from %s...' % input_fname) input_fname = os.path.abspath(input_fname) - info, edf_info = _get_edf_info(input_fname, stim_channel, annot, - annotmap, eog, misc, exclude, preload) - logger.info('Creating Raw.info structure...') + info, edf_info = _get_info(input_fname, stim_channel, annot, + annotmap, eog, misc, exclude, preload) + logger.info('Created Raw.info structure...') _check_update_montage(info, montage) if bool(annot) != bool(annotmap): @@ -88,8 +102,7 @@ def __init__(self, input_fname, montage, eog=None, misc=None, last_samps = [edf_info['nsamples'] - 1] super(RawEDF, self).__init__( info, preload, filenames=[input_fname], raw_extras=[edf_info], - last_samps=last_samps, orig_format='int', - verbose=verbose) + last_samps=last_samps, orig_format='int', verbose=verbose) logger.info('Ready.') @@ -104,11 +117,11 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): raise NotImplementedError('mult is not supported yet') exclude = self._raw_extras[fi]['exclude'] sel = np.arange(self.info['nchan'])[idx] - n_samps = self._raw_extras[fi]['n_samps'] buf_len = int(self._raw_extras[fi]['max_samp']) sfreq = self.info['sfreq'] - data_size = self._raw_extras[fi]['data_size'] + dtype = self._raw_extras[fi]['dtype_np'] + dtype_byte = self._raw_extras[fi]['dtype_byte'] data_offset = self._raw_extras[fi]['data_offset'] stim_channel = self._raw_extras[fi]['stim_channel'] tal_channels = self._raw_extras[fi]['tal_channel'] @@ -116,18 +129,23 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): annotmap = self._raw_extras[fi]['annotmap'] subtype = self._raw_extras[fi]['subtype'] + if np.size(dtype_byte) > 1: + if len(np.unique(dtype_byte)) > 1: + warn("Multiple data type not supported") + dtype = dtype[0] + dtype_byte = dtype_byte[0] + # gain constructor physical_range = np.array([ch['range'] for ch in self.info['chs']]) cal = np.array([ch['cal'] for ch in self.info['chs']]) - gains = np.atleast_2d(self._raw_extras[fi]['units'] * - (physical_range / cal)) + cal = np.atleast_2d(physical_range / cal) # physical / digital + gains = np.atleast_2d(self._raw_extras[fi]['units']) # physical dimension in uV - physical_min = np.atleast_2d(self._raw_extras[fi]['units'] * - self._raw_extras[fi]['physical_min']) + physical_min = self._raw_extras[fi]['physical_min'] digital_min = self._raw_extras[fi]['digital_min'] - offsets = np.atleast_2d(physical_min - (digital_min * gains)).T + offsets = np.atleast_2d(physical_min - (digital_min * cal)).T if tal_channels is not None: for tal_channel in tal_channels: offsets[tal_channel] = 0 @@ -150,18 +168,19 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): # But to speed it up, we really need to read multiple blocks at once, # Otherwise we can end up with e.g. 18,181 chunks for a 20 MB file! # Let's do ~10 MB chunks: - n_per = max(10 * 1024 * 1024 // (ch_offsets[-1] * data_size), 1) + n_per = max(10 * 1024 * 1024 // (ch_offsets[-1] * dtype_byte), 1) with open(self._filenames[fi], 'rb', buffering=0) as fid: - # extract data + + # Extract data start_offset = (data_offset + - block_start_idx * ch_offsets[-1] * data_size) + block_start_idx * ch_offsets[-1] * dtype_byte) for ai in range(0, len(r_lims), n_per): - block_offset = ai * ch_offsets[-1] * data_size + block_offset = ai * ch_offsets[-1] * dtype_byte n_read = min(len(r_lims) - ai, n_per) fid.seek(start_offset + block_offset, 0) # Read and reshape to (n_chunks_read, ch0_ch1_ch2_ch3...) many_chunk = _read_ch(fid, subtype, ch_offsets[-1] * n_read, - data_size).reshape(n_read, -1) + dtype_byte, dtype).reshape(n_read, -1) for ii, ci in enumerate(selection): # This now has size (n_chunks_read, n_samp[ci]) ch_data = many_chunk[:, ch_offsets[ci]:ch_offsets[ci + 1]] @@ -200,8 +219,10 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): ch_data, buf_len, n_samps[ci], npad=0, axis=-1) assert ch_data.shape == (len(ch_data), buf_len) data[ii, d_sidx:d_eidx] = ch_data.ravel()[r_sidx:r_eidx] - data *= gains.T[sel] - data += offsets[sel] + + data *= cal.T[sel] # scale + data += offsets[sel] # offset + data *= gains.T[sel] # apply units gain last # only try to read the stim channel if it's not None and it's # actually one of the requested channels @@ -233,26 +254,27 @@ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): stim[n_start:n_stop] += evid data[stim_channel_idx, :] = stim[start:stop] else: - # Allows support for up to 17-bit trigger values (2 ** 17 - 1) stim = np.bitwise_and(data[stim_channel_idx].astype(int), - 131071) + 2**17 - 1) data[stim_channel_idx, :] = stim -def _read_ch(fid, subtype, samp, data_size): +def _read_ch(fid, subtype, samp, dtype_byte, dtype=None): """Read a number of samples for a single channel.""" + # BDF if subtype in ('24BIT', 'bdf'): - ch_data = np.fromfile(fid, dtype=np.uint8, - count=samp * data_size) + ch_data = np.fromfile(fid, dtype=dtype, count=samp * dtype_byte) ch_data = ch_data.reshape(-1, 3).astype(np.int32) ch_data = ((ch_data[:, 0]) + (ch_data[:, 1] << 8) + (ch_data[:, 2] << 16)) # 24th bit determines the sign ch_data[ch_data >= (1 << 23)] -= (1 << 24) - # edf data: 16bit data + + # GDF data and EDF data else: - ch_data = np.fromfile(fid, dtype=' 0: - edf_info['subtype'] = subtype - else: - edf_info['subtype'] = os.path.splitext(fname)[1][1:].lower() - - edf_info['n_records'] = n_records = int(fid.read(8).decode()) - # record length in seconds - record_length = float(fid.read(8).decode()) - if record_length == 0: - edf_info['record_length'] = record_length = 1. - warn('Header information is incorrect for record length. Default ' - 'record length set to 1.') - else: - edf_info['record_length'] = record_length - nchan = int(fid.read(4).decode()) - channels = list(range(nchan)) - ch_names = [fid.read(16).strip().decode() for ch in channels] - exclude = [ch_names.index(idx) for idx in exclude] - for ch in channels: - fid.read(80) # transducer - units = [fid.read(8).strip().decode() for ch in channels] - edf_info['units'] = list() - edf_info['exclude'] = exclude - include = list() - for i, unit in enumerate(units): - if i in exclude: - continue - if unit == 'uV': - edf_info['units'].append(1e-6) - else: - edf_info['units'].append(1) - include.append(i) - ch_names = [ch_names[idx] for idx in include] - physical_min = np.array([float(fid.read(8).decode()) - for ch in channels])[include] - edf_info['physical_min'] = physical_min - physical_max = np.array([float(fid.read(8).decode()) - for ch in channels])[include] - digital_min = np.array([float(fid.read(8).decode()) - for ch in channels])[include] - edf_info['digital_min'] = digital_min - digital_max = np.array([float(fid.read(8).decode()) - for ch in channels])[include] - prefiltering = [fid.read(80).strip().decode() for ch in channels][:-1] - highpass = np.ravel([re.findall('HP:\s+(\w+)', filt) - for filt in prefiltering]) - lowpass = np.ravel([re.findall('LP:\s+(\w+)', filt) - for filt in prefiltering]) - - # number of samples per record - n_samps = np.array([int(fid.read(8).decode()) for ch - in channels]) - edf_info['n_samps'] = n_samps - - fid.read(32 * nchan).decode() # reserved - assert fid.tell() == header_nbytes - - fid.seek(0, 2) - n_bytes = fid.tell() - n_data_bytes = n_bytes - header_nbytes - total_samps = (n_data_bytes // 3 if subtype == '24BIT' - else n_data_bytes // 2) - read_records = total_samps // np.sum(n_samps) - if n_records != read_records: - warn('Number of records from the header does not match the file ' - 'size (perhaps the recording was not stopped before exiting).' - ' Inferring from the file size.') - edf_info['n_records'] = n_records = read_records - - n_samps = n_samps[include] - physical_ranges = physical_max - physical_min - cals = digital_max - digital_min - - if edf_info['subtype'] in ('24BIT', 'bdf'): - edf_info['data_size'] = 3 # 24-bit (3 byte) integers + # Read header from file + ext = os.path.splitext(fname)[1][1:].lower() + logger.info('%s file detected' % ext.upper()) + if ext in ('bdf', 'edf'): + edf_info = _read_edf_header(fname, stim_channel, annot, annotmap, + exclude) + elif ext in ('gdf'): + if annot is not None: + warn('Annotations not yet supported for GDF files.') + edf_info = _read_gdf_header(fname, stim_channel, exclude) + edf_info['annot'] = None + edf_info['annotmap'] = None else: - edf_info['data_size'] = 2 # 16-bit (2 byte) integers - - # Creates a list of dicts of eeg channels for raw.info - logger.info('Setting channel info structure...') - chs = list() - + raise NotImplementedError( + 'Only GDF, EDF, and BDF files are supported, got %s.' % ext) + + include = edf_info['include'] + ch_names = edf_info['ch_names'] + n_samps = edf_info['n_samps'][include] + nchan = edf_info['nchan'] + physical_ranges = edf_info['physical_max'] - edf_info['physical_min'] + cals = edf_info['digital_max'] - edf_info['digital_min'] + if np.any(~np.isfinite(cals)): + idx = np.where(~np.isfinite(cals))[0] + warn('Scaling factor is not defined in following channels:\n' + + ', '.join(ch_names[i] for i in idx)) + cals[idx] = 1 + + # Check that stimulus channel exists in dataset + if stim_channel is not None: + stim_channel = _check_stim_channel(stim_channel, ch_names, include) + + # Annotations tal_ch_name = 'EDF Annotations' tal_chs = np.where(np.array(ch_names) == tal_ch_name)[0] if len(tal_chs) > 0: @@ -422,8 +377,10 @@ def _get_edf_info(fname, stim_channel, annot, annotmap, eog, misc, exclude, raise RuntimeError('%s' % ('EDF+ Annotations (TAL) channel needs to be' ' parsed completely on loading.' ' You must set preload parameter to True.')) - if stim_channel == -1: - stim_channel = len(include) - 1 + + # Creates a list of dicts of eeg channels for raw.info + logger.info('Setting channel info structure...') + chs = list() pick_mask = np.ones(len(ch_names)) for idx, ch_info in enumerate(zip(ch_names, physical_ranges, cals)): ch_name, physical_range, cal = ch_info @@ -476,12 +433,22 @@ def _get_edf_info(fname, stim_channel, annot, annotmap, eog, misc, exclude, edf_info['max_samp'] = max_samp = n_samps[picks].max() else: edf_info['max_samp'] = max_samp = n_samps.max() + + # Info structure + # -------------------------------------------------------------------------- + # sfreq defined as the max sampling rate of eeg - sfreq = n_samps.max() / record_length + sfreq = n_samps.max() * \ + edf_info['record_length'][1] / edf_info['record_length'][0] + info = _empty_info(sfreq) - info['meas_date'] = calendar.timegm(date.utctimetuple()) + info['meas_date'] = edf_info['meas_date'] info['chs'] = chs + info['ch_names'] = ch_names + # Filter settings + highpass = edf_info['highpass'] + lowpass = edf_info['lowpass'] if highpass.size == 0: pass elif all(highpass): @@ -495,9 +462,8 @@ def _get_edf_info(fname, stim_channel, annot, annotmap, eog, misc, exclude, info['highpass'] = float(np.max(highpass)) warn('Channels contain different highpass filters. Highest filter ' 'setting will be stored.') - if lowpass.size == 0: - pass + pass # Placeholder for future use. Lowpass set in _empty_info. elif all(lowpass): if lowpass[0] == 'NaN': pass # Placeholder for future use. Lowpass set in _empty_info. @@ -511,16 +477,511 @@ def _get_edf_info(fname, stim_channel, annot, annotmap, eog, misc, exclude, # Some keys to be consistent with FIF measurement info info['description'] = None info['buffer_size_sec'] = 1. - edf_info['nsamples'] = int(n_records * max_samp) + edf_info['nsamples'] = int(edf_info['n_records'] * max_samp) # These are the conditions under which a stim channel will be interpolated if stim_channel is not None and not (annot and annotmap) and \ tal_channel is None and n_samps[stim_channel] != int(max_samp): warn('Interpolating stim channel. Events may jitter.') info._update_redundant() + return info, edf_info +def _read_edf_header(fname, stim_channel, annot, annotmap, exclude): + """Read header information from EDF+ or BDF file.""" + edf_info = dict() + edf_info.update(annot=annot, annotmap=annotmap, events=[]) + + with open(fname, 'rb') as fid: + + fid.read(8) # version (unused here) + + # patient ID + pid = fid.read(80).decode() + pid = pid.split(' ', 2) + patient = {} + if len(pid) >= 2: + patient['id'] = pid[0] + patient['name'] = pid[1] + + # Recording ID + meas_id = {} + meas_id['recording_id'] = fid.read(80).decode().strip(' \x00') + + day, month, year = [int(x) for x in + re.findall('(\d+)', fid.read(8).decode())] + hour, minute, sec = [int(x) for x in + re.findall('(\d+)', fid.read(8).decode())] + century = 2000 if year < 50 else 1900 + date = datetime.datetime(year + century, month, day, hour, minute, sec) + + header_nbytes = int(fid.read(8).decode()) + + subtype = fid.read(44).strip().decode()[:5] + if len(subtype) == 0: + subtype = os.path.splitext(fname)[1][1:].lower() + + n_records = int(fid.read(8).decode()) + record_length = np.array([float(fid.read(8)), 1.]) # in seconds + if record_length[0] == 0: + record_length = record_length[0] = 1. + warn('Header information is incorrect for record length. Default ' + 'record length set to 1.') + + nchan = int(fid.read(4).decode()) + channels = list(range(nchan)) + ch_names = [fid.read(16).strip().decode() for ch in channels] + exclude = [ch_names.index(idx) for idx in exclude] + for ch in channels: + fid.read(80) # transducer + units = [fid.read(8).strip().decode() for ch in channels] + edf_info['units'] = list() + include = list() + for i, unit in enumerate(units): + if i in exclude: + continue + if unit == 'uV': + edf_info['units'].append(1e-6) + else: + edf_info['units'].append(1) + include.append(i) + ch_names = [ch_names[idx] for idx in include] + + physical_min = np.array([float(fid.read(8).decode()) + for ch in channels])[include] + physical_max = np.array([float(fid.read(8).decode()) + for ch in channels])[include] + digital_min = np.array([float(fid.read(8).decode()) + for ch in channels])[include] + digital_max = np.array([float(fid.read(8).decode()) + for ch in channels])[include] + prefiltering = [fid.read(80).decode().strip(' \x00') + for ch in channels][:-1] + highpass = np.ravel([re.findall('HP:\s+(\w+)', filt) + for filt in prefiltering]) + lowpass = np.ravel([re.findall('LP:\s+(\w+)', filt) + for filt in prefiltering]) + + # number of samples per record + n_samps = np.array([int(fid.read(8).decode()) for ch + in channels]) + + # Populate edf_info + edf_info.update( + ch_names=ch_names, data_offset=header_nbytes, + digital_max=digital_max, digital_min=digital_min, exclude=exclude, + highpass=highpass, include=include, lowpass=lowpass, + meas_date=calendar.timegm(date.utctimetuple()), + n_records=n_records, n_samps=n_samps, nchan=nchan, + subject_info=patient, physical_max=physical_max, + physical_min=physical_min, record_length=record_length, + subtype=subtype) + + fid.read(32 * nchan).decode() # reserved + assert fid.tell() == header_nbytes + + fid.seek(0, 2) + n_bytes = fid.tell() + n_data_bytes = n_bytes - header_nbytes + total_samps = (n_data_bytes // 3 if subtype == '24BIT' + else n_data_bytes // 2) + read_records = total_samps // np.sum(n_samps) + if n_records != read_records: + warn('Number of records from the header does not match the file ' + 'size (perhaps the recording was not stopped before exiting).' + ' Inferring from the file size.') + edf_info['n_records'] = n_records = read_records + + if subtype in ('24BIT', 'bdf'): + edf_info['dtype_byte'] = 3 # 24-bit (3 byte) integers + edf_info['dtype_np'] = np.uint8 + else: + edf_info['dtype_byte'] = 2 # 16-bit (2 byte) integers + edf_info['dtype_np'] = np.int16 + + return edf_info + + +def _read_gdf_header(fname, stim_channel, exclude): + """Read GDF 1.x and GDF 2.x header info.""" + edf_info = dict() + # edf_info['annot'] = annot + # edf_info['annotmap'] = annotmap + edf_info['events'] = [] + + with open(fname, 'rb') as fid: + + version = fid.read(8).decode() + + gdftype_np = (None, np.int8, np.uint8, np.int16, np.uint16, np.int32, + np.uint32, np.int64, np.uint64, None, None, None, None, + None, None, None, np.float32, np.float64) + gdftype_byte = [np.dtype(x).itemsize if x is not None else 0 + for x in gdftype_np] + assert sum(gdftype_byte) == 42 + + edf_info['type'] = edf_info['subtype'] = version[:3] + edf_info['number'] = float(version[4:]) + + # GDF 1.x + # ---------------------------------------------------------------------- + if edf_info['number'] < 1.9: + + # patient ID + pid = fid.read(80).decode() + pid = pid.split(' ', 2) + patient = {} + if len(pid) >= 2: + patient['id'] = pid[0] + patient['name'] = pid[1] + + # Recording ID + meas_id = {} + meas_id['recording_id'] = fid.read(80).decode().strip(' \x00') + + # date + tm = fid.read(16).decode().strip(' \x00') + try: + if tm[14:16] == ' ': + tm = tm[:14] + '00' + tm[16:] + date = (datetime.datetime(int(tm[0:4]), int(tm[4:6]), + int(tm[6:8]), int(tm[8:10]), + int(tm[10:12]), int(tm[12:14]), + int(tm[14:16]) * pow(10, 4))) + except Exception: + date = datetime.datetime(2000, 1, 1) + + header_nbytes = np.fromfile(fid, np.int64, 1)[0] + meas_id['equipment'] = np.fromfile(fid, np.uint8, 8)[0] + meas_id['hospital'] = np.fromfile(fid, np.uint8, 8)[0] + meas_id['technician'] = np.fromfile(fid, np.uint8, 8)[0] + fid.seek(20, 1) # 20bytes reserved + + n_records = np.fromfile(fid, np.int64, 1)[0] + # record length in seconds + record_length = np.fromfile(fid, np.uint32, 2) + if record_length[0] == 0: + record_length[0] = 1. + warn('Header information is incorrect for record length. ' + 'Default record length set to 1.') + nchan = np.fromfile(fid, np.uint32, 1)[0] + channels = list(range(nchan)) + ch_names = [fid.read(16).decode().strip(' \x00') + for ch in channels] + fid.seek(80 * len(channels), 1) # transducer + units = [fid.read(8).decode().strip(' \x00') for ch in channels] + + exclude = [ch_names.index(idx) for idx in exclude] + include = list() + for i, unit in enumerate(units): + if unit[:2] == 'uV': + units[i] = 1e-6 + else: + units[i] = 1 + include.append(i) + + ch_names = [ch_names[idx] for idx in include] + physical_min = np.fromfile(fid, np.float64, len(channels)) + physical_max = np.fromfile(fid, np.float64, len(channels)) + digital_min = np.fromfile(fid, np.int64, len(channels)) + digital_max = np.fromfile(fid, np.int64, len(channels)) + prefiltering = [fid.read(80).decode().strip(' \x00') + for ch in channels][:-1] + highpass = np.ravel([re.findall('HP:\s+(\w+)', filt) + for filt in prefiltering]) + lowpass = np.ravel([re.findall('LP:\s+(\w+)', filt) + for filt in prefiltering]) + + # n samples per record + n_samps = np.fromfile(fid, np.int32, len(channels)) + + # channel data type + dtype = np.fromfile(fid, np.int32, len(channels)) + + # total number of bytes for data + bytes_tot = np.sum([gdftype_byte[t] * n_samps[i] + for i, t in enumerate(dtype)]) + + # Populate edf_info + edf_info.update( + bytes_tot=bytes_tot, ch_names=ch_names, + data_offset=header_nbytes, digital_min=digital_min, + digital_max=digital_max, + dtype_byte=[gdftype_byte[t] for t in dtype], + dtype_np=[gdftype_np[t] for t in dtype], exclude=exclude, + highpass=highpass, include=include, lowpass=lowpass, + meas_date=calendar.timegm(date.utctimetuple()), + meas_id=meas_id, n_records=n_records, n_samps=n_samps, + nchan=nchan, subject_info=patient, physical_max=physical_max, + physical_min=physical_min, record_length=record_length, + units=units) + + fid.seek(32 * edf_info['nchan'], 1) # reserved + assert fid.tell() == header_nbytes + + # Event table + # ------------------------------------------------------------------ + etp = header_nbytes + n_records * edf_info['bytes_tot'] + # skip data to go to event table + fid.seek(etp) + etmode = fid.read(1).decode() + if etmode != '': + etmode = np.fromfile(fid, np.uint8, 1).tolist() + sr = np.fromfile(fid, np.uint8, 3) + event_sr = sr[0] + events = [] + for i in range(1, len(sr)): + event_sr = event_sr + sr[i] * 256 ** i + n_events = np.fromfile(fid, np.uint32, 1)[0] + pos = np.fromfile(fid, np.uint32, n_events * 4) + typ = np.fromfile(fid, np.uint16, n_events * 2) + + if etmode == 3: + chn = np.fromfile(fid, np.uint16, n_events * 2) + dur = np.fromfile(fid, np.uint32, n_events * 4) + events.append([n_events, pos, typ, chn, dur]) + else: + dur = np.zeros(n_events, dtype=np.uint32) + events.append([n_events, pos, typ]) + + edf_info['events'] = events + # info['events'] = np.c_[pos, dur, typ] + + # GDF 2.x + # ---------------------------------------------------------------------- + else: + # FIXED HEADER + handedness = ('Unknown', 'Right', 'Left', 'Equal') + gender = ('Unknown', 'Male', 'Female') + scale = ('Unknown', 'No', 'Yes', 'Corrected') + + # date + pid = fid.read(66).decode() + pid = pid.split(' ', 2) + patient = {} + if len(pid) >= 2: + patient['id'] = pid[0] + patient['name'] = pid[1] + fid.seek(10, 1) # 10bytes reserved + + # Smoking / Alcohol abuse / drug abuse / medication + sadm = np.fromfile(fid, np.uint8, 1)[0] + patient['smoking'] = scale[sadm % 4] + patient['alcohol_abuse'] = scale[(sadm >> 2) % 4] + patient['drug_abuse'] = scale[(sadm >> 4) % 4] + patient['medication'] = scale[(sadm >> 6) % 4] + patient['weight'] = np.fromfile(fid, np.uint8, 1)[0] + if patient['weight'] == 0 or patient['weight'] == 255: + patient['weight'] = None + patient['height'] = np.fromfile(fid, np.uint8, 1)[0] + if patient['height'] == 0 or patient['height'] == 255: + patient['height'] = None + + # Gender / Handedness / Visual Impairment + ghi = np.fromfile(fid, np.uint8, 1)[0] + patient['sex'] = gender[ghi % 4] + patient['handedness'] = handedness[(ghi >> 2) % 4] + patient['visual'] = scale[(ghi >> 4) % 4] + + # Recording identification + meas_id = {} + meas_id['recording_id'] = fid.read(64).decode().strip(' \x00') + vhsv = np.fromfile(fid, np.uint8, 4) + loc = {} + if vhsv[3] == 0: + loc['vertpre'] = 10 * int(vhsv[0] >> 4) + int(vhsv[0] % 16) + loc['horzpre'] = 10 * int(vhsv[1] >> 4) + int(vhsv[1] % 16) + loc['size'] = 10 * int(vhsv[2] >> 4) + int(vhsv[2] % 16) + else: + loc['vertpre'] = 29 + loc['horzpre'] = 29 + loc['size'] = 29 + loc['version'] = 0 + loc['latitude'] = \ + float(np.fromfile(fid, np.uint32, 1)[0]) / 3600000 + loc['longitude'] = \ + float(np.fromfile(fid, np.uint32, 1)[0]) / 3600000 + loc['altitude'] = float(np.fromfile(fid, np.int32, 1)[0]) / 100 + meas_id['loc'] = loc + + date = np.fromfile(fid, np.uint64, 1)[0] + if date == 0: + date = datetime.datetime(1, 1, 1) + else: + date = datetime.datetime(1, 1, 1) + \ + datetime.timedelta(date * pow(2, -32) - 367) + + birthday = np.fromfile(fid, np.uint64, 1).tolist()[0] + if birthday == 0: + birthday = datetime.datetime(1, 1, 1) + else: + birthday = (datetime.datetime(1, 1, 1) + + datetime.timedelta(birthday * pow(2, -32) - 367)) + patient['birthday'] = birthday + if patient['birthday'] != datetime.datetime(1, 1, 1, 0, 0): + today = datetime.datetime.today() + patient['age'] = today.year - patient['birthday'].year + today = today.replace(year=patient['birthday'].year) + if today < patient['birthday']: + patient['age'] -= 1 + else: + patient['age'] = None + + header_nbytes = np.fromfile(fid, np.uint16, 1)[0] * 256 + + fid.seek(6, 1) # 6 bytes reserved + meas_id['equipment'] = np.fromfile(fid, np.uint8, 8) + meas_id['ip'] = np.fromfile(fid, np.uint8, 6) + patient['headsize'] = np.fromfile(fid, np.uint16, 3) + patient['headsize'] = np.asarray(patient['headsize'], np.float32) + patient['headsize'] = np.ma.masked_array( + patient['headsize'], + np.equal(patient['headsize'], 0), None).filled() + ref = np.fromfile(fid, np.float32, 3) + gnd = np.fromfile(fid, np.float32, 3) + n_records = np.fromfile(fid, np.int64, 1)[0] + + # record length in seconds + record_length = np.fromfile(fid, np.uint32, 2) + if record_length[0] == 0: + record_length[0] = 1. + warn('Header information is incorrect for record length. ' + 'Default record length set to 1.') + + nchan = np.fromfile(fid, np.uint16, 1)[0] + fid.seek(2, 1) # 2bytes reserved + + # Channels (variable header) + channels = list(range(nchan)) + ch_names = [fid.read(16).decode().strip(' \x00') + for ch in channels] + exclude = [ch_names.index(idx) for idx in exclude] + + fid.seek(80 * len(channels), 1) # reserved space + fid.seek(6 * len(channels), 1) # phys_dim, obsolete + + """The Physical Dimensions are encoded as int16, according to: + - Units codes : + https://sourceforge.net/p/biosig/svn/HEAD/tree/trunk/biosig/doc/units.csv + - Decimal factors codes: + https://sourceforge.net/p/biosig/svn/HEAD/tree/trunk/biosig/doc/DecimalFactors.txt + """ # noqa + units = np.fromfile(fid, np.uint16, len(channels)).tolist() + unitcodes = np.array(units[:]) + include = list() + for i, unit in enumerate(units): + if unit == 4275: # microvolts + units[i] = 1e-6 + elif unit == 512: # dimensionless + units[i] = 1 + elif unit == 0: + units[i] = 1 # unrecognized + else: + warn('Unsupported physical dimension for channel %d ' + '(assuming dimensionless). Please contact the ' + 'MNE-Python developers for support.' % i) + units[i] = 1 + include.append(i) + + ch_names = [ch_names[idx] for idx in include] + physical_min = np.fromfile(fid, np.float64, len(channels)) + physical_max = np.fromfile(fid, np.float64, len(channels)) + digital_min = np.fromfile(fid, np.float64, len(channels)) + digital_max = np.fromfile(fid, np.float64, len(channels)) + + fid.seek(68 * len(channels), 1) # obsolete + lowpass = np.fromfile(fid, np.float32, len(channels)) + highpass = np.fromfile(fid, np.float32, len(channels)) + notch = np.fromfile(fid, np.float32, len(channels)) + + # number of samples per record + n_samps = np.fromfile(fid, np.int32, len(channels)) + + # data type + dtype = np.fromfile(fid, np.int32, len(channels)) + + channel = {} + channel['xyz'] = [np.fromfile(fid, np.float32, 3)[0] + for ch in channels] + + if edf_info['number'] < 2.19: + impedance = np.fromfile(fid, np.uint8, len(channels)) + impedance[impedance == 255] = np.nan + channel['impedance'] = pow(2, impedance.astype(float) / 8) + fid.seek(19 * len(channels), 1) # reserved + else: + tmp = np.fromfile(fid, np.float32, 5 * len(channels)) + tmp = tmp[::5] + fZ = tmp[:] + impedance = tmp[:] + # channels with no voltage (code 4256) data + ch = [unitcodes & 65504 != 4256][0] + impedance[np.where(ch)] = None + # channel with no impedance (code 4288) data + ch = [unitcodes & 65504 != 4288][0] + fZ[np.where(ch)[0]] = None + + assert fid.tell() == header_nbytes + + # total number of bytes for data + bytes_tot = np.sum([gdftype_byte[t] * n_samps[i] + for i, t in enumerate(dtype)]) + + # Populate edf_info + edf_info.update( + bytes_tot=bytes_tot, ch_names=ch_names, + data_offset=header_nbytes, + dtype_byte=[gdftype_byte[t] for t in dtype], + dtype_np=[gdftype_np[t] for t in dtype], + digital_min=digital_min, digital_max=digital_max, + exclude=exclude, gnd=gnd, highpass=highpass, include=include, + impedance=impedance, lowpass=lowpass, + meas_date=calendar.timegm(date.utctimetuple()), + meas_id=meas_id, n_records=n_records, n_samps=n_samps, + nchan=nchan, notch=notch, subject_info=patient, + physical_max=physical_max, physical_min=physical_min, + record_length=record_length, ref=ref, units=units) + + # EVENT TABLE + # ------------------------------------------------------------------ + etp = edf_info['data_offset'] + edf_info['n_records'] * \ + edf_info['bytes_tot'] + fid.seek(etp) # skip data to go to event table + events = [] + etmode = fid.read(1).decode() + if etmode != '': + etmode = np.fromstring(etmode, np.uint8).tolist()[0] + + if edf_info['number'] < 1.94: + sr = np.fromfile(fid, np.uint8, 3) + event_sr = sr[0] + for i in range(1, len(sr)): + event_sr = event_sr + sr[i] * 2**i + n_events = np.fromfile(fid, np.uint32, 1)[0] + else: + ne = np.fromfile(fid, np.uint8, 3) + n_events = ne[0] + for i in range(1, len(ne)): + n_events = n_events + ne[i] * 2**i + event_sr = np.fromfile(fid, np.float32, 1)[0] + + pos = np.fromfile(fid, np.uint32, n_events * 4) + typ = np.fromfile(fid, np.uint16, n_events * 2) + + if etmode == 3: + chn = np.fromfile(fid, np.uint16, n_events * 2) + dur = np.fromfile(fid, np.uint32, n_events * 4) + events.append([n_events, pos, typ, chn, dur]) + else: + dur = np.zeros(n_events, dtype=np.uint32) + events.append([n_events, pos, typ]) + + edf_info['events'] = events + + return edf_info + + def _read_annot(annot, annotmap, sfreq, data_length): """Annotation File Reader. @@ -561,15 +1022,38 @@ def _read_annot(annot, annotmap, sfreq, data_length): return stim_channel +def _check_stim_channel(stim_channel, ch_names, include): + """Check that the stimulus channel exists in the current datafile.""" + if isinstance(stim_channel, str): + if stim_channel not in ch_names: + err = 'Could not find a channel named "{}" in datafile.' \ + .format(stim_channel) + casematch = [ch for ch in ch_names + if stim_channel.lower().replace(' ', '') == + ch.lower().replace(' ', '')] + if casematch: + err += ' Closest match is "{}".'.format(casematch[0]) + raise ValueError(err) + else: + if stim_channel == -1: + stim_channel = len(include) - 1 + elif stim_channel > len(ch_names): + raise ValueError('Requested stim_channel index ({}) exceeds total ' + 'number of channels in datafile ({})' + .format(stim_channel, len(ch_names))) + + return stim_channel + + def read_raw_edf(input_fname, montage=None, eog=None, misc=None, stim_channel=-1, annot=None, annotmap=None, exclude=(), preload=False, verbose=None): - """Reader function for EDF+, BDF conversion to FIF. + """Reader function for EDF+, BDF, GDF conversion to FIF. Parameters ---------- input_fname : str - Path to the EDF+,BDF file. + Path to the EDF+, BDF, or GDF file. montage : str | None | instance of Montage Path or instance of montage containing electrode positions. If None, sensor locations are (0,0,0). See the documentation of @@ -611,6 +1095,19 @@ def read_raw_edf(input_fname, montage=None, eog=None, misc=None, raw : Instance of RawEDF A Raw object containing EDF data. + Notes + ----- + Biosemi devices trigger codes are encoded in bits 1-16 of the status + channel, whereas system codes (CMS in/out-of range, battery low, etc.) are + coded in bits 16-23 (see http://www.biosemi.com/faq/trigger_signals.htm). + To retrieve correct event values (bits 1-16), one could do: + + >>> events = mne.find_events(...) # doctest:+SKIP + >>> events[:, 2] >>= 8 # doctest:+SKIP + + It is also possible to retrieve system codes, but no particular effort has + been made to decode these in MNE. + See Also -------- mne.io.Raw : Documentation of attribute and methods. diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py new file mode 100644 index 00000000000..32700c90770 --- /dev/null +++ b/mne/io/edf/tests/test_gdf.py @@ -0,0 +1,68 @@ +"""Data Equivalence Tests""" +from __future__ import print_function + +# Authors: Alexandre Barachant +# Nicolas Barascud +# +# License: BSD (3-clause) + +import os.path as op +import warnings + +from numpy.testing import assert_array_almost_equal, assert_array_equal +import numpy as np +import scipy.io as sio + +from mne.datasets import testing +from mne.io import read_raw_edf +from mne.utils import run_tests_if_main +from mne import pick_types, find_events + +warnings.simplefilter('always') + +data_path = testing.data_path(download=False) +gdf1_path = op.join(data_path, 'GDF', 'test_gdf_1.25') +gdf2_path = op.join(data_path, 'GDF', 'test_gdf_2.20') + + +@testing.requires_testing_data +def test_gdf_data(): + """Test reading raw GDF 1.x files.""" + raw = read_raw_edf(gdf1_path + '.gdf', eog=None, + misc=None, preload=True, stim_channel=None) + picks = pick_types(raw.info, meg=False, eeg=True, exclude='bads') + data, _ = raw[picks] + + # this .npy was generated using the official biosig python package + raw_biosig = np.load(gdf1_path + '_biosig.npy') + raw_biosig = raw_biosig * 1e-6 # data are stored in microvolts + data_biosig = raw_biosig[picks] + + # Assert data are almost equal + assert_array_almost_equal(data, data_biosig, 8) + + +@testing.requires_testing_data +def test_gdf2_data(): + """Test reading raw GDF 2.x files.""" + raw = read_raw_edf(gdf2_path + '.gdf', eog=None, misc=None, preload=True, + stim_channel='STATUS') + picks = pick_types(raw.info, meg=False, eeg=True, exclude='bads') + data, _ = raw[picks] + + # This .mat was generated using the official biosig matlab package + mat = sio.loadmat(gdf2_path + '_biosig.mat') + data_biosig = mat['dat'] * 1e-6 # data are stored in microvolts + data_biosig = data_biosig[picks] + + # Assert data are almost equal + assert_array_almost_equal(data, data_biosig, 8) + + # Find events + events = find_events(raw, verbose=1) + events[:, 2] >>= 8 # last 8 bits are system events in biosemi files + assert(events.shape[0] == 2) # 2 events in file + assert_array_equal(events[:, 2], [20, 28]) + + +run_tests_if_main()