diff --git a/backend/app.py b/backend/app.py index 226311b3..7389aadf 100644 --- a/backend/app.py +++ b/backend/app.py @@ -3,7 +3,7 @@ from waitress import serve from http import HTTPStatus -from classification.file_loading import get_raw_array +from classification.parser import get_raw_array from classification.exceptions import ClassificationError from classification.config.constants import Sex, ALLOWED_FILE_EXTENSIONS from classification.model import SleepStagesClassifier diff --git a/backend/classification/file_loading.py b/backend/classification/parser/__init__.py similarity index 52% rename from backend/classification/file_loading.py rename to backend/classification/parser/__init__.py index 2ff67d23..99a014ad 100644 --- a/backend/classification/file_loading.py +++ b/backend/classification/parser/__init__.py @@ -1,6 +1,5 @@ """ -Function utilities to convert data acquired on an OpenBCI -Cyton board using the SD card logging strategy. +Function utilities to convert data acquired on an OpenBCI board TODO: Consider cropping file (from bed to wake up time) here, before the for loop. Have to consider not all lines hold sample values (i.e. first line with comment and second line with a single timestamp). @@ -14,44 +13,27 @@ from mne import create_info from mne.io import RawArray import numpy as np -import pandas as pd -from classification.exceptions import ClassificationError -from classification.config.constants import ( - EEG_CHANNELS, - OPENBCI_CYTON_SAMPLE_RATE, -) - -ADS1299_Vref = 4.5 -ADS1299_gain = 24. -SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000 -SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6 - -FILE_COLUMN_OFFSET = 1 -CYTON_TOTAL_NB_CHANNELS = 8 -SKIP_ROWS = 2 +from classification.config.constants import OPENBCI_CYTON_SAMPLE_RATE, EEG_CHANNELS +from classification.parser.constants import SCALE_V_PER_COUNT +from classification.parser.file_type import FileType, detect_file_type def get_raw_array(file): - """Converts a file following the Cyton board SD card logging format into a mne.RawArray + """Converts a file following a logging format into a mne.RawArray Input: - file: received as an input file Returns: - mne.RawArray of the two EEG channels of interest """ - retained_columns = tuple(range(1, len(EEG_CHANNELS) + 1)) - - try: - eeg_raw = pd.read_csv(file, - skiprows=SKIP_ROWS, - usecols=retained_columns - ).to_numpy() - except Exception: - raise ClassificationError() + filetype = detect_file_type(file) + print(f""" + Detected {filetype.name} format. + """) - hexstr_to_int = np.vectorize(_hexstr_to_int) - eeg_raw = hexstr_to_int(eeg_raw) + parse = filetype.parser + eeg_raw = parse(file) raw_object = RawArray( SCALE_V_PER_COUNT * np.transpose(eeg_raw), @@ -61,6 +43,7 @@ def get_raw_array(file): ch_types='eeg'), verbose=False, ) + print(f""" First sample values: {raw_object[:, 0]} Second sample values: {raw_object[:, 1]} @@ -70,13 +53,3 @@ def get_raw_array(file): """) return raw_object - - -def _hexstr_to_int(hexstr): - """Converts a two complement hexadecimal value in a string to a signed float - Input: - - hex_value: signed hexadecimal value - Returns: - - decimal value - """ - return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True) diff --git a/backend/classification/parser/constants.py b/backend/classification/parser/constants.py new file mode 100644 index 00000000..3c95276a --- /dev/null +++ b/backend/classification/parser/constants.py @@ -0,0 +1,10 @@ +from classification.config.constants import EEG_CHANNELS + +ADS1299_Vref = 4.5 +ADS1299_gain = 24. +SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000 +SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6 + +FILE_COLUMN_OFFSET = 1 + +RETAINED_COLUMNS = tuple(range(FILE_COLUMN_OFFSET, len(EEG_CHANNELS) + 1)) diff --git a/backend/classification/parser/csv.py b/backend/classification/parser/csv.py new file mode 100644 index 00000000..106f2ab5 --- /dev/null +++ b/backend/classification/parser/csv.py @@ -0,0 +1,14 @@ +import pandas as pd + +from classification.exceptions import ClassificationError + +def read_csv(file, rows_to_skip=0, columns_to_read=None): + try: + raw_array = pd.read_csv(file, + skiprows=rows_to_skip, + usecols=columns_to_read + ).to_numpy() + except Exception: + raise ClassificationError() + + return raw_array diff --git a/backend/classification/parser/file_type.py b/backend/classification/parser/file_type.py new file mode 100644 index 00000000..3e62e4dc --- /dev/null +++ b/backend/classification/parser/file_type.py @@ -0,0 +1,20 @@ +from enum import Enum + +from classification.parser.sd_file import parse_sd_file +from classification.parser.session_file import parse_session_file + +class FileType(Enum): + SDFile = (parse_sd_file,) + SessionFile = (parse_session_file,) + def __init__(self, parser): + self.parser = parser + + +def detect_file_type(file) -> FileType: + """Detects file type + - file: received as an input file + Returns: + - FileType of the input file + """ + first_line = file.readline().decode("utf-8") + return FileType.SessionFile if "EEG Data" in first_line else FileType.SDFile diff --git a/backend/classification/parser/sd_file.py b/backend/classification/parser/sd_file.py new file mode 100644 index 00000000..ea608003 --- /dev/null +++ b/backend/classification/parser/sd_file.py @@ -0,0 +1,30 @@ +import numpy as np + +from classification.parser.constants import RETAINED_COLUMNS +from classification.parser.csv import read_csv + +ROWS_TO_SKIP = 2 + + +def _hexstr_to_int(hexstr): + """Converts a two complement hexadecimal value in a string to a signed float + Input: + - hex_value: signed hexadecimal value + Returns: + - decimal value + """ + return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True) + + +def parse_sd_file(file): + """Converts a file following SD File logging format into a np.array + Input: + - file: received as an input file + Returns: + - np.array of the two EEG channels of interest + """ + eeg_raw = read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS) + hexstr_to_int = np.vectorize(_hexstr_to_int) + eeg_raw = hexstr_to_int(eeg_raw) + + return eeg_raw diff --git a/backend/classification/parser/session_file.py b/backend/classification/parser/session_file.py new file mode 100644 index 00000000..eb5d77d7 --- /dev/null +++ b/backend/classification/parser/session_file.py @@ -0,0 +1,17 @@ +import pandas as pd + +from classification.parser.constants import RETAINED_COLUMNS +from classification.parser.csv import read_csv + + +ROWS_TO_SKIP = 5 + + +def parse_session_file(file): + """Converts a file following Session File logging format into a np.array + Input: + - file: received as an input file + Returns: + - np.array of the two EEG channels of interest + """ + return read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)