From 6ad1ac335e0c5b42807cd2971e091cf82e198a2e Mon Sep 17 00:00:00 2001
From: Anes Belfodil <ans.belfodil@gmail.com>
Date: Mon, 9 Nov 2020 15:14:16 -0500
Subject: [PATCH 1/4] Add session file parser

---
 backend/app.py                                |  2 +-
 .../{file_loading.py => parser/__init__.py}   | 55 ++++++-------------
 backend/classification/parser/constants.py    | 11 ++++
 backend/classification/parser/file_type.py    | 16 ++++++
 .../classification/parser/sd_file_parser.py   | 38 +++++++++++++
 .../parser/session_file_parser.py             | 25 +++++++++
 6 files changed, 109 insertions(+), 38 deletions(-)
 rename backend/classification/{file_loading.py => parser/__init__.py} (52%)
 create mode 100644 backend/classification/parser/constants.py
 create mode 100644 backend/classification/parser/file_type.py
 create mode 100644 backend/classification/parser/sd_file_parser.py
 create mode 100644 backend/classification/parser/session_file_parser.py

diff --git a/backend/app.py b/backend/app.py
index 226311b3..7389aadf 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -3,7 +3,7 @@
 from waitress import serve
 from http import HTTPStatus
 
-from classification.file_loading import get_raw_array
+from classification.parser import get_raw_array
 from classification.exceptions import ClassificationError
 from classification.config.constants import Sex, ALLOWED_FILE_EXTENSIONS
 from classification.model import SleepStagesClassifier
diff --git a/backend/classification/file_loading.py b/backend/classification/parser/__init__.py
similarity index 52%
rename from backend/classification/file_loading.py
rename to backend/classification/parser/__init__.py
index 2ff67d23..4b2b299a 100644
--- a/backend/classification/file_loading.py
+++ b/backend/classification/parser/__init__.py
@@ -1,6 +1,5 @@
 """
-Function utilities to convert data acquired on an OpenBCI
-Cyton board using the SD card logging strategy.
+Function utilities to convert data acquired on an OpenBCI board
 
 TODO: Consider cropping file (from bed to wake up time) here, before the for loop. Have to consider
 not all lines hold sample values (i.e. first line with comment and second line with a single timestamp).
@@ -14,44 +13,35 @@
 from mne import create_info
 from mne.io import RawArray
 import numpy as np
-import pandas as pd
 
-from classification.exceptions import ClassificationError
-from classification.config.constants import (
-    EEG_CHANNELS,
-    OPENBCI_CYTON_SAMPLE_RATE,
-)
+from classification.config.constants import OPENBCI_CYTON_SAMPLE_RATE, EEG_CHANNELS
+from classification.parser.constants import SCALE_V_PER_COUNT
+from classification.parser.sd_file_parser import parse_sd_file
+from classification.parser.session_file_parser import parse_session_file
+from classification.parser.file_type import FileType, detect_file_type
 
-ADS1299_Vref = 4.5
-ADS1299_gain = 24.
-SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
-SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6
 
-FILE_COLUMN_OFFSET = 1
-CYTON_TOTAL_NB_CHANNELS = 8
-SKIP_ROWS = 2
+_FILETYPE_PARSER_MAP = {
+    FileType.SDFile: parse_sd_file,
+    FileType.SessionFile: parse_session_file
+}
 
 
 def get_raw_array(file):
-    """Converts a file following the Cyton board SD card logging format into a mne.RawArray
+    """Converts a file following a logging format into a mne.RawArray
     Input:
     - file: received as an input file
     Returns:
     - mne.RawArray of the two EEG channels of interest
     """
 
-    retained_columns = tuple(range(1, len(EEG_CHANNELS) + 1))
-
-    try:
-        eeg_raw = pd.read_csv(file,
-                              skiprows=SKIP_ROWS,
-                              usecols=retained_columns
-                              ).to_numpy()
-    except Exception:
-        raise ClassificationError()
+    filetype = detect_file_type(file)
+    print(f"""
+    Detected {filetype.name} format.
+    """)
 
-    hexstr_to_int = np.vectorize(_hexstr_to_int)
-    eeg_raw = hexstr_to_int(eeg_raw)
+    parse = _FILETYPE_PARSER_MAP[filetype]
+    eeg_raw = parse(file)
 
     raw_object = RawArray(
         SCALE_V_PER_COUNT * np.transpose(eeg_raw),
@@ -61,6 +51,7 @@ def get_raw_array(file):
             ch_types='eeg'),
         verbose=False,
     )
+
     print(f"""
         First sample values: {raw_object[:, 0]}
         Second sample values: {raw_object[:, 1]}
@@ -70,13 +61,3 @@ def get_raw_array(file):
     """)
 
     return raw_object
-
-
-def _hexstr_to_int(hexstr):
-    """Converts a two complement hexadecimal value in a string to a signed float
-    Input:
-    - hex_value: signed hexadecimal value
-    Returns:
-    - decimal value
-    """
-    return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)
diff --git a/backend/classification/parser/constants.py b/backend/classification/parser/constants.py
new file mode 100644
index 00000000..c9e947b0
--- /dev/null
+++ b/backend/classification/parser/constants.py
@@ -0,0 +1,11 @@
+from classification.config.constants import EEG_CHANNELS
+
+ADS1299_Vref = 4.5
+ADS1299_gain = 24.
+SCALE_uV_PER_COUNT = ADS1299_Vref / ((2**23) - 1) / ADS1299_gain * 1000000
+SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6
+
+FILE_COLUMN_OFFSET = 1
+CYTON_TOTAL_NB_CHANNELS = 8
+
+RETAINED_COLUMNS = tuple(range(FILE_COLUMN_OFFSET, len(EEG_CHANNELS) + 1))
diff --git a/backend/classification/parser/file_type.py b/backend/classification/parser/file_type.py
new file mode 100644
index 00000000..c0422f74
--- /dev/null
+++ b/backend/classification/parser/file_type.py
@@ -0,0 +1,16 @@
+from enum import Enum, auto
+
+
+class FileType(Enum):
+    SDFile = auto()
+    SessionFile = auto()
+
+
+def detect_file_type(file) -> FileType:
+    """Detects file type
+    - file: received as an input file
+    Returns:
+    - FileType of the input file
+    """
+    first_line = file.readline().decode("utf-8")
+    return FileType.SessionFile if "EEG Data" in first_line else FileType.SDFile
diff --git a/backend/classification/parser/sd_file_parser.py b/backend/classification/parser/sd_file_parser.py
new file mode 100644
index 00000000..ebdc182d
--- /dev/null
+++ b/backend/classification/parser/sd_file_parser.py
@@ -0,0 +1,38 @@
+import pandas as pd
+import numpy as np
+
+from classification.exceptions import ClassificationError
+from classification.parser.constants import RETAINED_COLUMNS
+
+ROWS_TO_SKIP = 2
+
+
+def _hexstr_to_int(hexstr):
+    """Converts a two complement hexadecimal value in a string to a signed float
+    Input:
+    - hex_value: signed hexadecimal value
+    Returns:
+    - decimal value
+    """
+    return int.from_bytes(bytes.fromhex(hexstr), byteorder='big', signed=True)
+
+
+def parse_sd_file(file):
+    """Converts a file following SD File logging format into a np.array
+    Input:
+    - file: received as an input file
+    Returns:
+    - np.array of the two EEG channels of interest
+    """
+    try:
+        eeg_raw = pd.read_csv(file,
+                              skiprows=ROWS_TO_SKIP,
+                              usecols=RETAINED_COLUMNS
+                              ).to_numpy()
+    except Exception:
+        raise ClassificationError()
+
+    hexstr_to_int = np.vectorize(_hexstr_to_int)
+    eeg_raw = hexstr_to_int(eeg_raw)
+
+    return eeg_raw
diff --git a/backend/classification/parser/session_file_parser.py b/backend/classification/parser/session_file_parser.py
new file mode 100644
index 00000000..b204019d
--- /dev/null
+++ b/backend/classification/parser/session_file_parser.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+from classification.exceptions import ClassificationError
+from classification.parser.constants import RETAINED_COLUMNS
+
+ROWS_TO_SKIP = 5
+
+
+def parse_session_file(file):
+    """Converts a file following Session File logging format into a np.array
+    Input:
+    - file: received as an input file
+    Returns:
+    - np.array of the two EEG channels of interest
+    """
+
+    try:
+        eeg_raw = pd.read_csv(file,
+                              skiprows=ROWS_TO_SKIP,
+                              usecols=RETAINED_COLUMNS
+                              ).to_numpy()
+    except Exception:
+        raise ClassificationError()
+
+    return eeg_raw

From 1a79e2d5cb27c57cfecfe3bc1dc3ee9cb2c09575 Mon Sep 17 00:00:00 2001
From: Anes Belfodil <ans.belfodil@gmail.com>
Date: Mon, 9 Nov 2020 21:15:32 -0500
Subject: [PATCH 2/4] Remove unused constant

---
 backend/classification/parser/constants.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/classification/parser/constants.py b/backend/classification/parser/constants.py
index c9e947b0..3c95276a 100644
--- a/backend/classification/parser/constants.py
+++ b/backend/classification/parser/constants.py
@@ -6,6 +6,5 @@
 SCALE_V_PER_COUNT = SCALE_uV_PER_COUNT / 1e6
 
 FILE_COLUMN_OFFSET = 1
-CYTON_TOTAL_NB_CHANNELS = 8
 
 RETAINED_COLUMNS = tuple(range(FILE_COLUMN_OFFSET, len(EEG_CHANNELS) + 1))

From f0fd58bc20c33617a6203535c3f3348475f311ba Mon Sep 17 00:00:00 2001
From: Anes Belfodil <ans.belfodil@gmail.com>
Date: Mon, 9 Nov 2020 21:20:04 -0500
Subject: [PATCH 3/4] Extract csv reading to function

---
 backend/classification/parser/csv.py               | 14 ++++++++++++++
 backend/classification/parser/sd_file_parser.py    | 12 ++----------
 .../classification/parser/session_file_parser.py   | 14 +++-----------
 3 files changed, 19 insertions(+), 21 deletions(-)
 create mode 100644 backend/classification/parser/csv.py

diff --git a/backend/classification/parser/csv.py b/backend/classification/parser/csv.py
new file mode 100644
index 00000000..106f2ab5
--- /dev/null
+++ b/backend/classification/parser/csv.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+from classification.exceptions import ClassificationError
+
+def read_csv(file, rows_to_skip=0, columns_to_read=None):
+    try:
+        raw_array = pd.read_csv(file,
+                              skiprows=rows_to_skip,
+                              usecols=columns_to_read
+                              ).to_numpy()
+    except Exception:
+        raise ClassificationError()
+
+    return raw_array
diff --git a/backend/classification/parser/sd_file_parser.py b/backend/classification/parser/sd_file_parser.py
index ebdc182d..ea608003 100644
--- a/backend/classification/parser/sd_file_parser.py
+++ b/backend/classification/parser/sd_file_parser.py
@@ -1,8 +1,7 @@
-import pandas as pd
 import numpy as np
 
-from classification.exceptions import ClassificationError
 from classification.parser.constants import RETAINED_COLUMNS
+from classification.parser.csv import read_csv
 
 ROWS_TO_SKIP = 2
 
@@ -24,14 +23,7 @@ def parse_sd_file(file):
     Returns:
     - np.array of the two EEG channels of interest
     """
-    try:
-        eeg_raw = pd.read_csv(file,
-                              skiprows=ROWS_TO_SKIP,
-                              usecols=RETAINED_COLUMNS
-                              ).to_numpy()
-    except Exception:
-        raise ClassificationError()
-
+    eeg_raw = read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)
     hexstr_to_int = np.vectorize(_hexstr_to_int)
     eeg_raw = hexstr_to_int(eeg_raw)
 
diff --git a/backend/classification/parser/session_file_parser.py b/backend/classification/parser/session_file_parser.py
index b204019d..eb5d77d7 100644
--- a/backend/classification/parser/session_file_parser.py
+++ b/backend/classification/parser/session_file_parser.py
@@ -1,7 +1,8 @@
 import pandas as pd
 
-from classification.exceptions import ClassificationError
 from classification.parser.constants import RETAINED_COLUMNS
+from classification.parser.csv import read_csv
+
 
 ROWS_TO_SKIP = 5
 
@@ -13,13 +14,4 @@ def parse_session_file(file):
     Returns:
     - np.array of the two EEG channels of interest
     """
-
-    try:
-        eeg_raw = pd.read_csv(file,
-                              skiprows=ROWS_TO_SKIP,
-                              usecols=RETAINED_COLUMNS
-                              ).to_numpy()
-    except Exception:
-        raise ClassificationError()
-
-    return eeg_raw
+    return read_csv(file, ROWS_TO_SKIP, RETAINED_COLUMNS)

From f282eeedbb43a03c4b974d281c0d1c5a276a52b6 Mon Sep 17 00:00:00 2001
From: Anes Belfodil <ans.belfodil@gmail.com>
Date: Mon, 9 Nov 2020 21:22:45 -0500
Subject: [PATCH 4/4] Use the power of enums

---
 backend/classification/parser/__init__.py              | 10 +---------
 backend/classification/parser/file_type.py             | 10 +++++++---
 .../parser/{sd_file_parser.py => sd_file.py}           |  0
 .../parser/{session_file_parser.py => session_file.py} |  0
 4 files changed, 8 insertions(+), 12 deletions(-)
 rename backend/classification/parser/{sd_file_parser.py => sd_file.py} (100%)
 rename backend/classification/parser/{session_file_parser.py => session_file.py} (100%)

diff --git a/backend/classification/parser/__init__.py b/backend/classification/parser/__init__.py
index 4b2b299a..99a014ad 100644
--- a/backend/classification/parser/__init__.py
+++ b/backend/classification/parser/__init__.py
@@ -16,17 +16,9 @@
 
 from classification.config.constants import OPENBCI_CYTON_SAMPLE_RATE, EEG_CHANNELS
 from classification.parser.constants import SCALE_V_PER_COUNT
-from classification.parser.sd_file_parser import parse_sd_file
-from classification.parser.session_file_parser import parse_session_file
 from classification.parser.file_type import FileType, detect_file_type
 
 
-_FILETYPE_PARSER_MAP = {
-    FileType.SDFile: parse_sd_file,
-    FileType.SessionFile: parse_session_file
-}
-
-
 def get_raw_array(file):
     """Converts a file following a logging format into a mne.RawArray
     Input:
@@ -40,7 +32,7 @@ def get_raw_array(file):
     Detected {filetype.name} format.
     """)
 
-    parse = _FILETYPE_PARSER_MAP[filetype]
+    parse = filetype.parser
     eeg_raw = parse(file)
 
     raw_object = RawArray(
diff --git a/backend/classification/parser/file_type.py b/backend/classification/parser/file_type.py
index c0422f74..3e62e4dc 100644
--- a/backend/classification/parser/file_type.py
+++ b/backend/classification/parser/file_type.py
@@ -1,9 +1,13 @@
-from enum import Enum, auto
+from enum import Enum
 
+from classification.parser.sd_file import parse_sd_file
+from classification.parser.session_file import parse_session_file
 
 class FileType(Enum):
-    SDFile = auto()
-    SessionFile = auto()
+     SDFile = (parse_sd_file,)
+     SessionFile = (parse_session_file,)
+     def __init__(self, parser):
+             self.parser = parser
 
 
 def detect_file_type(file) -> FileType:
diff --git a/backend/classification/parser/sd_file_parser.py b/backend/classification/parser/sd_file.py
similarity index 100%
rename from backend/classification/parser/sd_file_parser.py
rename to backend/classification/parser/sd_file.py
diff --git a/backend/classification/parser/session_file_parser.py b/backend/classification/parser/session_file.py
similarity index 100%
rename from backend/classification/parser/session_file_parser.py
rename to backend/classification/parser/session_file.py