physiopy · m-miedema · Aug 21, 2024 · Jul 11, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/.gitignore b/.gitignore
@@ -125,3 +125,7 @@ dmypy.json
 .pyre/
 
 .vscode/
+
+# Test Data
+physutils/tests/data/bids-dir
+tmp.*
diff --git a/physutils/io.py b/physutils/io.py
@@ -5,23 +5,143 @@
 
 import importlib
 import json
+import os
 import os.path as op
 
 import numpy as np
+from bids import BIDSLayout
 from loguru import logger
 
 from physutils import physio
 
 EXPECTED = ["data", "fs", "history", "metadata"]
 
 
+def load_from_bids(
+    bids_path,
+    subject,
+    session=None,
+    task=None,
+    run=None,
+    recording=None,
+    extension="tsv.gz",
+    suffix="physio",
+):
+    """
+    Load physiological data from BIDS-formatted directory
+
+    Parameters
+    ----------
+    bids_path : str
+        Path to BIDS-formatted directory
+    subject : str
+        Subject identifier
+    session : str
+        Session identifier
+    task : str
+        Task identifier
+    run : str
+        Run identifier
+    suffix : str
+        Suffix of file to load
+
+    Returns
+    -------
+    data : :class:`physutils.Physio`
+        Loaded physiological data
+    """
+
+    # check if file exists and is in BIDS format
+    if not op.exists(bids_path):
+        raise FileNotFoundError(f"Provided path {bids_path} does not exist")
+
+    layout = BIDSLayout(bids_path, validate=False)
+    bids_file = layout.get(
+        subject=subject,
+        session=session,
+        task=task,
+        run=run,
+        suffix=suffix,
+        extension=extension,
+        recording=recording,
+    )
+    logger.debug(f"BIDS file found: {bids_file}")
+    if len(bids_file) == 0:
+        raise FileNotFoundError(
+            f"No files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
+        )
+    if len(bids_file) > 1:
+        raise ValueError(
+            f"Multiple files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
+        )
+
+    config_file = bids_file[0].get_metadata()
+    fs = config_file["SamplingFrequency"]
+    t_start = config_file["StartTime"] if "StartTime" in config_file else 0
+    columns = config_file["Columns"]
+    logger.debug(f"Loaded structure contains columns: {columns}")
+
+    physio_objects = {}
+    data = np.loadtxt(bids_file[0].path)
+
+    if "time" in columns:
+        idx_0 = np.argmax(data[:, columns.index("time")] >= t_start)
+    else:
+        idx_0 = 0
+        logger.warning(
+            "No time column found in file. Assuming data starts at the beginning of the file"
+        )
+
+    for col in columns:
+        col_physio_type = None
+        if any([x in col.lower() for x in ["cardiac", "ppg", "ecg", "card", "pulse"]]):
+            col_physio_type = "cardiac"
+        elif any(
+            [
+                x in col.lower()
+                for x in ["respiratory", "rsp", "resp", "breath", "co2", "o2"]
+            ]
+        ):
+            col_physio_type = "respiratory"
+        elif any([x in col.lower() for x in ["trigger", "tr"]]):
+            col_physio_type = "trigger"
+        elif any([x in col.lower() for x in ["time"]]):
+            continue
+        else:
+            logger.warning(
+                f"Column {col}'s type cannot be determined. Additional features may be missing."
+            )
+
+        if col_physio_type in ["cardiac", "respiratory"]:
+            physio_objects[col] = physio.Physio(
+                data[idx_0:, columns.index(col)],
+                fs=fs,
+                history=[physio._get_call(exclude=[])],
+            )
+            physio_objects[col]._physio_type = col_physio_type
+            physio_objects[col]._label = (
+                bids_file[0].filename.split(".")[0].replace("_physio", "")
+            )
+
+        if col_physio_type == "trigger":
+            # TODO: Implement trigger loading using the MRI data object
+            logger.warning("MRI trigger characteristics extraction not yet implemented")
+            physio_objects[col] = physio.Physio(
+                data[idx_0:, columns.index(col)],
+                fs=fs,
+                history=[physio._get_call(exclude=[])],
+            )
+
+    return physio_objects
+
+
 def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
     """
     Returns `Physio` object with provided data
 
     Parameters
     ----------
-    data : str or array_like or Physio_like
+    data : str, os.path.PathLike or array_like or Physio_like
         Input physiological data. If array_like, should be one-dimensional
     fs : float, optional
         Sampling rate of `data`. Default: None
@@ -46,7 +166,7 @@ def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
 
     # first check if the file was made with `save_physio`; otherwise, try to
     # load it as a plain text file and instantiate a history
-    if isinstance(data, str):
+    if isinstance(data, str) or isinstance(data, os.PathLike):
         try:
             inp = dict(np.load(data, allow_pickle=allow_pickle))
             for attr in EXPECTED:

diff --git a/physutils/physio.py b/physutils/physio.py
@@ -220,10 +220,12 @@ def new_physio_like(
 
     if suppdata is None:
         suppdata = ref_physio._suppdata if copy_suppdata else None
-    
+
     label = ref_physio.label if copy_label else None
     physio_type = ref_physio.physio_type if copy_physio_type else None
-    computed_metrics = list(ref_physio.computed_metrics) if copy_computed_metrics else []
+    computed_metrics = (
+        dict(ref_physio.computed_metrics) if copy_computed_metrics else {}
+    )
 
     # make new class
     out = ref_physio.__class__(
@@ -340,7 +342,7 @@ def __init__(
                 reject=np.empty(0, dtype=int),
             )
         self._suppdata = None if suppdata is None else np.asarray(suppdata).squeeze()
-        self._computed_metrics = []
+        self._computed_metrics = dict()
 
     def __array__(self):
         return self.data
@@ -542,3 +544,49 @@ def neurokit2phys(
             metadata = dict(peaks=peaks)
 
         return cls(data, fs=fs, metadata=metadata, **kwargs)
+
+
+class MRIConfig:
+    """
+    Class to hold MRI configuration information
+
+    Parameters
+    ----------
+    slice_timings : 1D array_like
+        Slice timings in seconds
+    n_scans : int
+        Number of volumes in the MRI scan
+    tr : float
+        Repetition time in seconds
+    """
+
+    def __init__(self, slice_timings=None, n_scans=None, tr=None):
+        if np.ndim(slice_timings) > 1:
+            raise ValueError("Slice timings must be a 1-dimensional array.")
+
+        self._slice_timings = np.asarray(slice_timings)
+        self._n_scans = int(n_scans)
+        self._tr = float(tr)
+        logger.debug(f"Initializing new MRIConfig object: {self}")
+
+    def __str__(self):
+        return "{name}(n_scans={n_scans}, tr={tr})".format(
+            name=self.__class__.__name__,
+            n_scans=self._n_scans,
+            tr=self._tr,
+        )
+
+    @property
+    def slice_timings(self):
+        """Slice timings in seconds"""
+        return self._slice_timings
+
+    @property
+    def n_scans(self):
+        """Number of volumes in the MRI scan"""
+        return self._n_scans
+
+    @property
+    def tr(self):
+        """Repetition time in seconds"""
+        return self._tr
diff --git a/physutils/tests/test_io.py b/physutils/tests/test_io.py
@@ -7,7 +7,11 @@
 import pytest
 
 from physutils import io, physio
-from physutils.tests.utils import filter_physio, get_test_data_path
+from physutils.tests.utils import (
+    create_random_bids_structure,
+    filter_physio,
+    get_test_data_path,
+)
 
 
 def test_load_physio(caplog):
@@ -46,6 +50,43 @@ def test_load_physio(caplog):
         io.load_physio([1, 2, 3])
 
 
+def test_load_from_bids():
+    create_random_bids_structure("physutils/tests/data", recording_id="cardiac")
+    phys_array = io.load_from_bids(
+        "physutils/tests/data/bids-dir",
+        subject="01",
+        session="01",
+        task="rest",
+        run="01",
+        recording="cardiac",
+    )
+
+    for col in phys_array.keys():
+        assert isinstance(phys_array[col], physio.Physio)
+        # The data saved are the ones after t_0 = -3s
+        assert phys_array[col].data.size == 80000
+        assert phys_array[col].fs == 10000.0
+        assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"
+
+
+def test_load_from_bids_no_rec():
+    create_random_bids_structure("physutils/tests/data")
+    phys_array = io.load_from_bids(
+        "physutils/tests/data/bids-dir",
+        subject="01",
+        session="01",
+        task="rest",
+        run="01",
+    )
+
+    for col in phys_array.keys():
+        assert isinstance(phys_array[col], physio.Physio)
+        # The data saved are the ones after t_0 = -3s
+        assert phys_array[col].data.size == 80000
+        assert phys_array[col].fs == 10000.0
+        assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"
+
+
 def test_save_physio(tmpdir):
     pckl = io.load_physio(get_test_data_path("ECG.phys"), allow_pickle=True)
     out = io.save_physio(tmpdir.join("tmp").purebasename, pckl)

diff --git a/physutils/tests/utils.py b/physutils/tests/utils.py
@@ -2,9 +2,12 @@
 Utilities for testing
 """
 
+import json
+from os import makedirs
 from os.path import join as pjoin
 
 import numpy as np
+import pandas as pd
 from pkg_resources import resource_filename
 from scipy import signal
 
@@ -77,3 +80,91 @@ def filter_physio(data, cutoffs, method, *, order=3):
     filtered = physio.new_physio_like(data, signal.filtfilt(b, a, data))
 
     return filtered
+
+
+def create_random_bids_structure(data_dir, recording_id=None):
+
+    dataset_description = {
+        "Name": "Example BIDS Dataset",
+        "BIDSVersion": "1.7.0",
+        "License": "",
+        "Authors": ["Author1", "Author2"],
+        "Acknowledgements": "",
+        "HowToAcknowledge": "",
+        "Funding": "",
+        "ReferencesAndLinks": "",
+        "DatasetDOI": "",
+    }
+
+    physio_json = {
+        "SamplingFrequency": 10000.0,
+        "StartTime": -3,
+        "Columns": [
+            "time",
+            "respiratory_chest",
+            "trigger",
+            "cardiac",
+            "respiratory_CO2",
+            "respiratory_O2",
+        ],
+    }
+
+    # Create BIDS structure directory
+    subject_id = "01"
+    session_id = "01"
+    task_id = "rest"
+    run_id = "01"
+    recording_id = recording_id
+
+    bids_dir = pjoin(
+        data_dir, "bids-dir", f"sub-{subject_id}", f"ses-{session_id}", "func"
+    )
+    makedirs(bids_dir, exist_ok=True)
+
+    # Create dataset_description.json
+    with open(pjoin(data_dir, "bids-dir", "dataset_description.json"), "w") as f:
+        json.dump(dataset_description, f, indent=4)
+
+    if recording_id is not None:
+        filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}_recording-{recording_id}"
+    else:
+        filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}"
+
+    # Create physio.json
+    with open(
+        pjoin(
+            bids_dir,
+            f"{filename_body}_physio.json",
+        ),
+        "w",
+    ) as f:
+        json.dump(physio_json, f, indent=4)
+
+    # Initialize tsv file with random data columns and a time column
+    num_rows = 100000
+    num_cols = 6
+    time_offset = 2
+    time = (
+        np.arange(num_rows) / physio_json["SamplingFrequency"]
+        + physio_json["StartTime"]
+        - time_offset
+    )
+    data = np.column_stack((time, np.random.rand(num_rows, num_cols - 1).round(8)))
+    df = pd.DataFrame(data)
+
+    # Compress dataframe into tsv.gz
+    tsv_gz_file = pjoin(
+        bids_dir,
+        f"{filename_body}_physio.tsv.gz",
+    )
+
+    df.to_csv(
+        tsv_gz_file,
+        sep="\t",
+        index=False,
+        header=False,
+        float_format="%.8e",
+        compression="gzip",
+    )
+
+    return bids_dir