Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
'vcs_pageview_mode': '',
'style_nav_header_background': 'LightSlateGray',
# Toc options
'collapse_navigation': True,
'collapse_navigation': False,
'sticky_navigation': True,
'navigation_depth': 4,
'includehidden': True,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Note: this is a work in progress. More information is coming.
user_guide.rst

.. toctree::
:maxdepth: 3
:maxdepth: 4
:caption: HED Python API:

api2.rst
Expand Down
95 changes: 65 additions & 30 deletions hed/tools/remodeling/cli/run_remodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import argparse
from hed.errors.exceptions import HedFileError
from hed.tools.util.io_util import get_file_list
from hed.tools.util.io_util import get_file_list, get_task_from_file
from hed.tools.bids.bids_dataset import BidsDataset
from hed.tools.remodeling.dispatcher import Dispatcher
from hed.tools.remodeling.backup_manager import BackupManager
Expand All @@ -19,7 +19,7 @@ def get_parser():
"""
parser = argparse.ArgumentParser(description="Converts event files based on a json file specifying operations.")
parser.add_argument("data_dir", help="Full path of dataset root directory.")
parser.add_argument("remodel_path", help="Full path of the file with remodeling instructions.")
parser.add_argument("model_path", help="Full path of the file with remodeling instructions.")
parser.add_argument("-b", "--bids-format", action='store_true', dest="use_bids",
help="If present, the dataset is in BIDS format with sidecars. HED analysis is available.")
parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
Expand All @@ -44,7 +44,9 @@ def get_parser():
parser.add_argument("-s", "--save-formats", nargs="*", default=['.json', '.txt'], dest="save_formats",
help="Format for saving any summaries, if any. If no summaries are to be written," +
"use the -ns option.")
parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The names of the task.")
parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[],
help="The names of the task. If an empty list is given, all tasks are lumped together." +
" If * is given, then tasks are found and reported individually.")
parser.add_argument("-v", "--verbose", action='store_true',
help="If present, output informative messages as computation progresses.")
parser.add_argument("-w", "--work-dir", default="", dest="work_dir",
Expand All @@ -54,6 +56,28 @@ def get_parser():
return parser


def handle_backup(args):
""" Restores the backup if applicable.

Parameters:
args (obj): parsed arguments as an object.

Returns:
str or None: backup name if there was a backup done.

"""
if args.no_backup:
backup_name = None
else:
backup_man = BackupManager(args.data_dir)
if not backup_man.get_backup(args.backup_name):
raise HedFileError("BackupDoesNotExist", f"Backup {args.backup_name} does not exist. "
f"Please run_remodel_backup first", "")
backup_man.restore_backup(args.backup_name, args.task_names, verbose=args.verbose)
backup_name = args.backup_name
return backup_name


def parse_arguments(arg_list=None):
""" Parse the command line arguments or arg_list if given.

Expand All @@ -76,10 +100,10 @@ def parse_arguments(arg_list=None):
args.extensions = None
args.data_dir = os.path.realpath(args.data_dir)
args.exclude_dirs = args.exclude_dirs + ['remodel']
args.model_path = os.path.realpath(args.remodel_path)
args.model_path = os.path.realpath(args.model_path)
if args.verbose:
print(f"Data directory: {args.data_dir}\nRemodel path: {args.remodel_path}")
with open(args.remodel_path, 'r') as fp:
print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}")
with open(args.model_path, 'r') as fp:
operations = json.load(fp)
parsed_operations, errors = Dispatcher.parse_operations(operations)
if errors:
Expand All @@ -88,12 +112,30 @@ def parse_arguments(arg_list=None):
return args, operations


def run_bids_ops(dispatch, args):
def parse_tasks(files, task_args):
if not task_args:
return {"": files}
task_dict = {}
for my_file in files:
task = get_task_from_file(my_file)
if not task:
continue
task_entry = task_dict.get(task, [])
task_entry.append(my_file)
task_dict[task] = task_entry
if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*":
return task_dict
task_dict = {key: task_dict[key] for key in task_args if key in task_dict}
return task_dict


def run_bids_ops(dispatch, args, tabular_files):
""" Run the remodeler on a BIDS dataset.

Parameters:
dispatch (Dispatcher): Manages the execution of the operations.
args (Object): The command-line arguments as an object.
tabular_files (list): List of tabular files to run the ops on.

"""
bids = BidsDataset(dispatch.data_root, tabular_types=['events'], exclude_dirs=args.exclude_dirs)
Expand All @@ -103,9 +145,8 @@ def run_bids_ops(dispatch, args):
events = bids.get_tabular_group(args.file_suffix)
if args.verbose:
print(f"Processing {dispatch.data_root}")
for events_obj in events.datafile_dict.values():
if args.task_names and events_obj.get_entity('task') not in args.task_names:
continue
filtered_events = [events.datafile_dict[key] for key in tabular_files]
for events_obj in filtered_events:
sidecar_list = events.get_sidecars_from_path(events_obj)
if sidecar_list:
sidecar = events.sidecar_dict[sidecar_list[-1]].contents
Expand All @@ -118,26 +159,23 @@ def run_bids_ops(dispatch, args):
df.to_csv(events_obj.file_path, sep='\t', index=False, header=True)


def run_direct_ops(dispatch, args):
def run_direct_ops(dispatch, args, tabular_files):
""" Run the remodeler on files of a specified form in a directory tree.

Parameters:
dispatch (Dispatcher): Controls the application of the operations and backup.
args (argparse.Namespace): Dictionary of arguments and their values.
tabular_files (list): List of files to include in this run.

"""

tabular_files = get_file_list(dispatch.data_root, name_suffix=args.file_suffix, extensions=args.extensions,
exclude_dirs=args.exclude_dirs)
if args.verbose:
print(f"Found {len(tabular_files)} files with suffix {args.file_suffix} and extensions {str(args.extensions)}")
if hasattr(args, 'json_sidecar'):
sidecar = args.json_sidecar
else:
sidecar = None
for file_path in tabular_files:
if args.task_names and not BackupManager.get_task(args.task_names, file_path):
continue
df = dispatch.run_operations(file_path, verbose=args.verbose, sidecar=sidecar)
if not args.no_update:
df.to_csv(file_path, sep='\t', index=False, header=True)
Expand All @@ -158,25 +196,22 @@ def main(arg_list=None):
args, operations = parse_arguments(arg_list)
if not os.path.isdir(args.data_dir):
raise HedFileError("DataDirectoryDoesNotExist", f"The root data directory {args.data_dir} does not exist", "")
if args.no_backup:
backup_name = None
else:
backup_man = BackupManager(args.data_dir)
if not backup_man.get_backup(args.backup_name):
raise HedFileError("BackupDoesNotExist", f"Backup {args.backup_name} does not exist. "
f"Please run_remodel_backup first", "")
backup_man.restore_backup(args.backup_name, args.task_names, verbose=args.verbose)
backup_name = args.backup_name
backup_name = handle_backup(args)
dispatch = Dispatcher(operations, data_root=args.data_dir, backup_name=backup_name, hed_versions=args.hed_versions)
if args.use_bids:
run_bids_ops(dispatch, args)
else:
run_direct_ops(dispatch, args)
save_dir = None
if args.work_dir:
save_dir = os.path.realpath(os.path.join(args.work_dir, Dispatcher.REMODELING_SUMMARY_PATH))
if not args.no_summaries:
dispatch.save_summaries(args.save_formats, individual_summaries=args.individual_summaries, summary_dir=save_dir)
files = get_file_list(dispatch.data_root, name_suffix=args.file_suffix, extensions=args.extensions,
exclude_dirs=args.exclude_dirs)
task_dict = parse_tasks(files, args.task_names)
for task, files in task_dict.items():
if args.use_bids:
run_bids_ops(dispatch, args, files)
else:
run_direct_ops(dispatch, args, files)
if not args.no_summaries:
dispatch.save_summaries(args.save_formats, individual_summaries=args.individual_summaries,
summary_dir=save_dir, task_name=task)


if __name__ == '__main__':
Expand Down
20 changes: 14 additions & 6 deletions hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def get_data_file(self, file_designator):
In this case, the corresponding backup file is read and returned.
- If a string is passed and there is no backup manager,
the data file corresponding to the file_designator is read and returned.
- If a Pandas DataFrame is passed, return a copy.
- If a Pandas DataFrame, return a copy.

"""
if isinstance(file_designator, pd.DataFrame):
Expand Down Expand Up @@ -154,25 +154,32 @@ def run_operations(self, file_path, sidecar=None, verbose=False):
df = self.post_proc_data(df)
return df

def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="separate", summary_dir=None):
def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="separate",
summary_dir=None, task_name=""):
""" Save the summary files in the specified formats.

Parameters:
save_formats (list): A list of formats [".txt", ."json"]
individual_summaries (str): If True, include summaries of individual files.
individual_summaries (str): "consolidated", "individual", or "none".
summary_dir (str or None): Directory for saving summaries.
task_name (str): Name of task if summaries separated by task or "" if not separated.

Notes:
The summaries are saved in the dataset derivatives/remodeling folder if no save_dir is provided.

Notes:
- "consolidated" means that the overall summary and summaries of individual files are in one summary file.
- "individual" means that the summaries of individual files are in separate files.
- "none" means that only the overall summary is produced.

"""
if not save_formats:
return
if not summary_dir:
summary_dir = self.get_summary_save_dir()
os.makedirs(summary_dir, exist_ok=True)
for context_name, context_item in self.summary_dicts.items():
context_item.save(summary_dir, save_formats, individual_summaries=individual_summaries)
for summary_name, summary_item in self.summary_dicts.items():
summary_item.save(summary_dir, save_formats, individual_summaries=individual_summaries, task_name=task_name)

@staticmethod
def parse_operations(operation_list):
Expand Down Expand Up @@ -242,7 +249,8 @@ def errors_to_str(messages, title="", sep='\n'):
return title + sep + errors
return errors

def get_schema(self, hed_versions):
@staticmethod
def get_schema(hed_versions):
if not hed_versions:
return None
elif isinstance(hed_versions, str) or isinstance(hed_versions, list):
Expand Down
20 changes: 13 additions & 7 deletions hed/tools/remodeling/operations/base_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_text_summary(self, individual_summaries="separate"):

return summary

def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate"):
def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""):

for file_format in file_formats:
if file_format == '.txt':
Expand All @@ -126,24 +126,29 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate")
summary = self.get_summary(individual_summaries=individual_summaries)
else:
continue
self._save_summary_files(save_dir, file_format, summary, individual_summaries)
self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name)

def _save_summary_files(self, save_dir, file_format, summary, individual_summaries):
def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''):
""" Save the files in the appropriate format.

Parameters:
save_dir (str): Path to the directory in which the summaries will be saved.
file_format (str): string representing the extension (including .), '.txt' or '.json'.
summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys).
individual_summaries (str): "consolidated", "individual", or "none".
task_name (str): Name of task to be included in file name if multiple tasks.

"""
if self.op.append_timecode:
time_stamp = '_' + get_timestamp()
else:
time_stamp = ''
if task_name:
task_name = "_" + task_name
this_save = os.path.join(save_dir, self.op.summary_name + '/')
os.makedirs(os.path.realpath(this_save), exist_ok=True)
filename = os.path.realpath(os.path.join(this_save, self.op.summary_filename + time_stamp + file_format))
filename = os.path.realpath(os.path.join(this_save,
self.op.summary_filename + task_name + time_stamp + file_format))
individual = summary.get("Individual files", {})
if individual_summaries == "none" or not individual:
self.dump_summary(filename, summary["Dataset"])
Expand All @@ -155,15 +160,16 @@ def _save_summary_files(self, save_dir, file_format, summary, individual_summari
individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/')
os.makedirs(os.path.realpath(individual_dir), exist_ok=True)
for name, sum_str in individual.items():
filename = self._get_summary_filepath(individual_dir, name, time_stamp, file_format)
filename = self._get_summary_filepath(individual_dir, name, task_name, time_stamp, file_format)
self.dump_summary(filename, sum_str)

def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format):
def _get_summary_filepath(self, individual_dir, name, task_name, time_stamp, file_format):
""" Return the filepath for the summary including the timestamp

Parameters:
individual_dir (str): path of the directory in which the summary should be stored.
name (str): Path of the original file from which the summary was extracted.
task_name (str): Task name if separate summaries for different tasks or the empty string if not separated.
time_stamp (str): Formatted date-time string to be included in the filename of the summary.

Returns:
Expand All @@ -176,7 +182,7 @@ def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format):
match = True
filename = None
while match:
filename = f"{self.op.summary_filename}_{this_name}_{count}{time_stamp}{file_format}"
filename = f"{self.op.summary_filename}_{this_name}{task_name}_{count}{time_stamp}{file_format}"
filename = os.path.realpath(os.path.join(individual_dir, filename))
if not os.path.isfile(filename):
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _get_categorical_string(self, result, offset="", indent=" "):
str: Formatted string suitable for saving in a file or printing.

"""
cat_dict = result.get('Categorical columns', {})
cat_dict = result.get('Categorical column summaries', {})
if not cat_dict:
return ""
count_dict = result['Categorical counts']
Expand Down
10 changes: 10 additions & 0 deletions hed/tools/util/io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,13 @@ def _split_entity(piece):
return {"key": split_piece[0].strip(), "value": split_piece[1].strip()}
else:
return {"bad": piece}


def get_task_from_file(file_path):
filename = os.path.splitext(os.path.basename(file_path))
basename = filename[0].strip()
position = basename.lower().find("task-")
if position == -1:
return ""
splits = re.split(r'[_.]', basename[position+5:])
return splits[0]
1 change: 0 additions & 1 deletion readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ sphinx:
python:
install:
- requirements: docs/requirements.txt
system_packages: true
Loading