Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,7 @@ dmypy.json
.pyre/

.vscode/

# Test Data
physutils/tests/data/bids-dir
tmp.*
124 changes: 122 additions & 2 deletions physutils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,143 @@

import importlib
import json
import os
import os.path as op

import numpy as np
from bids import BIDSLayout
from loguru import logger

from physutils import physio

EXPECTED = ["data", "fs", "history", "metadata"]


def load_from_bids(
Copy link
Contributor

@me-pic me-pic Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to add the parameter recording=None ? The reason is that according to the current version of the BIDS spec physio data with different sampling rate need to be saved under different file names using the recording entity to differentiate them apart.

Copy link
Contributor

@me-pic me-pic Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And recording should be added when you call layout.get

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, done!

bids_path,
subject,
session=None,
task=None,
run=None,
recording=None,
extension="tsv.gz",
suffix="physio",
):
"""
Load physiological data from BIDS-formatted directory

Parameters
----------
bids_path : str
Path to BIDS-formatted directory
subject : str
Subject identifier
session : str
Session identifier
task : str
Task identifier
run : str
Run identifier
suffix : str
Suffix of file to load

Returns
-------
data : :class:`physutils.Physio`
Loaded physiological data
"""

# check if file exists and is in BIDS format
if not op.exists(bids_path):
raise FileNotFoundError(f"Provided path {bids_path} does not exist")

layout = BIDSLayout(bids_path, validate=False)
bids_file = layout.get(
subject=subject,
session=session,
task=task,
run=run,
suffix=suffix,
extension=extension,
recording=recording,
)
logger.debug(f"BIDS file found: {bids_file}")
if len(bids_file) == 0:
raise FileNotFoundError(
f"No files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
)
if len(bids_file) > 1:
raise ValueError(
f"Multiple files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
)

config_file = bids_file[0].get_metadata()
fs = config_file["SamplingFrequency"]
t_start = config_file["StartTime"] if "StartTime" in config_file else 0
columns = config_file["Columns"]
logger.debug(f"Loaded structure contains columns: {columns}")

physio_objects = {}
data = np.loadtxt(bids_file[0].path)

if "time" in columns:
idx_0 = np.argmax(data[:, columns.index("time")] >= t_start)
else:
idx_0 = 0
logger.warning(
"No time column found in file. Assuming data starts at the beginning of the file"
)

for col in columns:
col_physio_type = None
if any([x in col.lower() for x in ["cardiac", "ppg", "ecg", "card", "pulse"]]):
col_physio_type = "cardiac"
elif any(
[
x in col.lower()
for x in ["respiratory", "rsp", "resp", "breath", "co2", "o2"]
]
):
col_physio_type = "respiratory"
elif any([x in col.lower() for x in ["trigger", "tr"]]):
col_physio_type = "trigger"
elif any([x in col.lower() for x in ["time"]]):
continue
else:
logger.warning(
f"Column {col}'s type cannot be determined. Additional features may be missing."
)

if col_physio_type in ["cardiac", "respiratory"]:
physio_objects[col] = physio.Physio(
data[idx_0:, columns.index(col)],
fs=fs,
history=[physio._get_call(exclude=[])],
)
physio_objects[col]._physio_type = col_physio_type
physio_objects[col]._label = (
bids_file[0].filename.split(".")[0].replace("_physio", "")
)

if col_physio_type == "trigger":
# TODO: Implement trigger loading using the MRI data object
logger.warning("MRI trigger characteristics extraction not yet implemented")
physio_objects[col] = physio.Physio(
data[idx_0:, columns.index(col)],
fs=fs,
history=[physio._get_call(exclude=[])],
)

return physio_objects


def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
"""
Returns `Physio` object with provided data

Parameters
----------
data : str or array_like or Physio_like
data : str, os.path.PathLike or array_like or Physio_like
Input physiological data. If array_like, should be one-dimensional
fs : float, optional
Sampling rate of `data`. Default: None
Expand All @@ -46,7 +166,7 @@ def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):

# first check if the file was made with `save_physio`; otherwise, try to
# load it as a plain text file and instantiate a history
if isinstance(data, str):
if isinstance(data, str) or isinstance(data, os.PathLike):
try:
inp = dict(np.load(data, allow_pickle=allow_pickle))
for attr in EXPECTED:
Expand Down
54 changes: 51 additions & 3 deletions physutils/physio.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,12 @@ def new_physio_like(

if suppdata is None:
suppdata = ref_physio._suppdata if copy_suppdata else None

label = ref_physio.label if copy_label else None
physio_type = ref_physio.physio_type if copy_physio_type else None
computed_metrics = list(ref_physio.computed_metrics) if copy_computed_metrics else []
computed_metrics = (
dict(ref_physio.computed_metrics) if copy_computed_metrics else {}
)

# make new class
out = ref_physio.__class__(
Expand Down Expand Up @@ -340,7 +342,7 @@ def __init__(
reject=np.empty(0, dtype=int),
)
self._suppdata = None if suppdata is None else np.asarray(suppdata).squeeze()
self._computed_metrics = []
self._computed_metrics = dict()

def __array__(self):
return self.data
Expand Down Expand Up @@ -542,3 +544,49 @@ def neurokit2phys(
metadata = dict(peaks=peaks)

return cls(data, fs=fs, metadata=metadata, **kwargs)


class MRIConfig:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I missed it, but is there any test to cover that class ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it is not used yet, because it would need detection of the trigger channel in order to initialize it. I can open an issue defining the next steps for it, and I'll implement it if there is time after the workflow

"""
Class to hold MRI configuration information

Parameters
----------
slice_timings : 1D array_like
Slice timings in seconds
n_scans : int
Number of volumes in the MRI scan
tr : float
Repetition time in seconds
"""

def __init__(self, slice_timings=None, n_scans=None, tr=None):
if np.ndim(slice_timings) > 1:
raise ValueError("Slice timings must be a 1-dimensional array.")

self._slice_timings = np.asarray(slice_timings)
self._n_scans = int(n_scans)
self._tr = float(tr)
logger.debug(f"Initializing new MRIConfig object: {self}")

def __str__(self):
return "{name}(n_scans={n_scans}, tr={tr})".format(
name=self.__class__.__name__,
n_scans=self._n_scans,
tr=self._tr,
)

@property
def slice_timings(self):
"""Slice timings in seconds"""
return self._slice_timings

@property
def n_scans(self):
"""Number of volumes in the MRI scan"""
return self._n_scans

@property
def tr(self):
"""Repetition time in seconds"""
return self._tr
43 changes: 42 additions & 1 deletion physutils/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
import pytest

from physutils import io, physio
from physutils.tests.utils import filter_physio, get_test_data_path
from physutils.tests.utils import (
create_random_bids_structure,
filter_physio,
get_test_data_path,
)


def test_load_physio(caplog):
Expand Down Expand Up @@ -46,6 +50,43 @@ def test_load_physio(caplog):
io.load_physio([1, 2, 3])


def test_load_from_bids():
create_random_bids_structure("physutils/tests/data", recording_id="cardiac")
phys_array = io.load_from_bids(
"physutils/tests/data/bids-dir",
subject="01",
session="01",
task="rest",
run="01",
recording="cardiac",
)

for col in phys_array.keys():
assert isinstance(phys_array[col], physio.Physio)
# The data saved are the ones after t_0 = -3s
assert phys_array[col].data.size == 80000
assert phys_array[col].fs == 10000.0
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"


def test_load_from_bids_no_rec():
create_random_bids_structure("physutils/tests/data")
phys_array = io.load_from_bids(
"physutils/tests/data/bids-dir",
subject="01",
session="01",
task="rest",
run="01",
)

for col in phys_array.keys():
assert isinstance(phys_array[col], physio.Physio)
# The data saved are the ones after t_0 = -3s
assert phys_array[col].data.size == 80000
assert phys_array[col].fs == 10000.0
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to delete the bids dataset at the end of the function or just to add the bids-dir directory in the .gitignore ?



def test_save_physio(tmpdir):
pckl = io.load_physio(get_test_data_path("ECG.phys"), allow_pickle=True)
out = io.save_physio(tmpdir.join("tmp").purebasename, pckl)
Expand Down
91 changes: 91 additions & 0 deletions physutils/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
Utilities for testing
"""

import json
from os import makedirs
from os.path import join as pjoin

import numpy as np
import pandas as pd
from pkg_resources import resource_filename
from scipy import signal

Expand Down Expand Up @@ -77,3 +80,91 @@ def filter_physio(data, cutoffs, method, *, order=3):
filtered = physio.new_physio_like(data, signal.filtfilt(b, a, data))

return filtered


def create_random_bids_structure(data_dir, recording_id=None):

dataset_description = {
"Name": "Example BIDS Dataset",
"BIDSVersion": "1.7.0",
"License": "",
"Authors": ["Author1", "Author2"],
"Acknowledgements": "",
"HowToAcknowledge": "",
"Funding": "",
"ReferencesAndLinks": "",
"DatasetDOI": "",
}

physio_json = {
"SamplingFrequency": 10000.0,
"StartTime": -3,
"Columns": [
"time",
"respiratory_chest",
"trigger",
"cardiac",
"respiratory_CO2",
"respiratory_O2",
],
}

# Create BIDS structure directory
subject_id = "01"
session_id = "01"
task_id = "rest"
run_id = "01"
recording_id = recording_id

bids_dir = pjoin(
data_dir, "bids-dir", f"sub-{subject_id}", f"ses-{session_id}", "func"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should make sure that the test covers the possibility to have the recording entity mentioned in the filename. Ex. sub-006_ses-03_task-test_run-01_recording-cardiac_physio.tsv.gz would be a valid filename according to the BIDS standard. And since we also included a recording parameter in the load_from_bids function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry @maestroque I think my comment was not clear. The tests should cover the possibility to have filenames with the recording entity as well as filenames that does not include that entity.

)
makedirs(bids_dir, exist_ok=True)

# Create dataset_description.json
with open(pjoin(data_dir, "bids-dir", "dataset_description.json"), "w") as f:
json.dump(dataset_description, f, indent=4)

if recording_id is not None:
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}_recording-{recording_id}"
else:
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}"

# Create physio.json
with open(
pjoin(
bids_dir,
f"{filename_body}_physio.json",
),
"w",
) as f:
json.dump(physio_json, f, indent=4)

# Initialize tsv file with random data columns and a time column
num_rows = 100000
num_cols = 6
time_offset = 2
time = (
np.arange(num_rows) / physio_json["SamplingFrequency"]
+ physio_json["StartTime"]
- time_offset
)
data = np.column_stack((time, np.random.rand(num_rows, num_cols - 1).round(8)))
df = pd.DataFrame(data)

# Compress dataframe into tsv.gz
tsv_gz_file = pjoin(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand why the file is first saved as a tsp, then loaded and saved again as a compressed file. I think you can just directly save df as a tsv.gz since that is the required format according to the BIDS spec.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right, it was left as such from debugging

bids_dir,
f"{filename_body}_physio.tsv.gz",
)

df.to_csv(
tsv_gz_file,
sep="\t",
index=False,
header=False,
float_format="%.8e",
compression="gzip",
)

return bids_dir
Loading