hed-standard · VisLab · Sep 9, 2023 · Sep 9, 2023
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -87,7 +87,7 @@
     'vcs_pageview_mode': '',
     'style_nav_header_background': 'LightSlateGray',
     # Toc options
-    'collapse_navigation': True,
+    'collapse_navigation': False,
     'sticky_navigation': True,
     'navigation_depth': 4,
     'includehidden': True,

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -21,7 +21,7 @@ Note:  this is a work in progress. More information is coming.
    user_guide.rst
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 4
    :caption: HED Python API:
 
    api2.rst

diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py
@@ -4,7 +4,7 @@
 import json
 import argparse
 from hed.errors.exceptions import HedFileError
-from hed.tools.util.io_util import get_file_list
+from hed.tools.util.io_util import get_file_list, get_task_from_file
 from hed.tools.bids.bids_dataset import BidsDataset
 from hed.tools.remodeling.dispatcher import Dispatcher
 from hed.tools.remodeling.backup_manager import BackupManager
@@ -19,7 +19,7 @@ def get_parser():
     """
     parser = argparse.ArgumentParser(description="Converts event files based on a json file specifying operations.")
     parser.add_argument("data_dir", help="Full path of dataset root directory.")
-    parser.add_argument("remodel_path", help="Full path of the file with remodeling instructions.")
+    parser.add_argument("model_path", help="Full path of the file with remodeling instructions.")
     parser.add_argument("-b", "--bids-format", action='store_true', dest="use_bids",
                         help="If present, the dataset is in BIDS format with sidecars. HED analysis is available.")
     parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
@@ -44,7 +44,9 @@ def get_parser():
     parser.add_argument("-s", "--save-formats", nargs="*", default=['.json', '.txt'], dest="save_formats",
                         help="Format for saving any summaries, if any. If no summaries are to be written," +
                              "use the -ns option.")
-    parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The names of the task.")
+    parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[],
+                        help="The names of the task. If an empty list is given, all tasks are lumped together." +
+                        " If * is given, then tasks are found and reported individually.")
     parser.add_argument("-v", "--verbose", action='store_true',
                         help="If present, output informative messages as computation progresses.")
     parser.add_argument("-w", "--work-dir", default="", dest="work_dir",
@@ -54,6 +56,28 @@ def get_parser():
     return parser
 
 
+def handle_backup(args):
+    """ Restores the backup if applicable.
+
+    Parameters:
+        args (obj): parsed arguments as an object.
+
+    Returns:
+        str or None:  backup name if there was a backup done.
+
+    """
+    if args.no_backup:
+        backup_name = None
+    else:
+        backup_man = BackupManager(args.data_dir)
+        if not backup_man.get_backup(args.backup_name):
+            raise HedFileError("BackupDoesNotExist", f"Backup {args.backup_name} does not exist. "
+                               f"Please run_remodel_backup first", "")
+        backup_man.restore_backup(args.backup_name, args.task_names, verbose=args.verbose)
+        backup_name = args.backup_name
+    return backup_name
+
+
 def parse_arguments(arg_list=None):
     """ Parse the command line arguments or arg_list if given.
 
@@ -76,10 +100,10 @@ def parse_arguments(arg_list=None):
         args.extensions = None
     args.data_dir = os.path.realpath(args.data_dir)
     args.exclude_dirs = args.exclude_dirs + ['remodel']
-    args.model_path = os.path.realpath(args.remodel_path)
+    args.model_path = os.path.realpath(args.model_path)
     if args.verbose:
-        print(f"Data directory: {args.data_dir}\nRemodel path: {args.remodel_path}")
-    with open(args.remodel_path, 'r') as fp:
+        print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}")
+    with open(args.model_path, 'r') as fp:
         operations = json.load(fp)
     parsed_operations, errors = Dispatcher.parse_operations(operations)
     if errors:
@@ -88,12 +112,30 @@ def parse_arguments(arg_list=None):
     return args, operations
 
 
-def run_bids_ops(dispatch, args):
+def parse_tasks(files, task_args):
+    if not task_args:
+        return {"": files}
+    task_dict = {}
+    for my_file in files:
+        task = get_task_from_file(my_file)
+        if not task:
+            continue
+        task_entry = task_dict.get(task, [])
+        task_entry.append(my_file)
+        task_dict[task] = task_entry
+    if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*":
+        return task_dict
+    task_dict = {key: task_dict[key] for key in task_args if key in task_dict}
+    return task_dict
+
+
+def run_bids_ops(dispatch, args, tabular_files):
     """ Run the remodeler on a BIDS dataset.
 
     Parameters:
         dispatch (Dispatcher): Manages the execution of the operations.
         args (Object): The command-line arguments as an object.
+        tabular_files (list): List of tabular files to run the ops on.
 
     """
     bids = BidsDataset(dispatch.data_root, tabular_types=['events'], exclude_dirs=args.exclude_dirs)
@@ -103,9 +145,8 @@ def run_bids_ops(dispatch, args):
     events = bids.get_tabular_group(args.file_suffix)
     if args.verbose:
         print(f"Processing {dispatch.data_root}")
-    for events_obj in events.datafile_dict.values():
-        if args.task_names and events_obj.get_entity('task') not in args.task_names:
-            continue
+    filtered_events = [events.datafile_dict[key] for key in tabular_files]
+    for events_obj in filtered_events:
         sidecar_list = events.get_sidecars_from_path(events_obj)
         if sidecar_list:
             sidecar = events.sidecar_dict[sidecar_list[-1]].contents
@@ -118,26 +159,23 @@ def run_bids_ops(dispatch, args):
             df.to_csv(events_obj.file_path, sep='\t', index=False, header=True)
 
 
-def run_direct_ops(dispatch, args):
+def run_direct_ops(dispatch, args, tabular_files):
     """ Run the remodeler on files of a specified form in a directory tree.
 
     Parameters:
         dispatch (Dispatcher):  Controls the application of the operations and backup.
         args (argparse.Namespace): Dictionary of arguments and their values.
+        tabular_files (list): List of files to include in this run.
 
     """
 
-    tabular_files = get_file_list(dispatch.data_root, name_suffix=args.file_suffix, extensions=args.extensions,
-                                  exclude_dirs=args.exclude_dirs)
     if args.verbose:
         print(f"Found {len(tabular_files)} files with suffix {args.file_suffix} and extensions {str(args.extensions)}")
     if hasattr(args, 'json_sidecar'):
         sidecar = args.json_sidecar
     else:
         sidecar = None
     for file_path in tabular_files:
-        if args.task_names and not BackupManager.get_task(args.task_names, file_path):
-            continue
         df = dispatch.run_operations(file_path, verbose=args.verbose, sidecar=sidecar)
         if not args.no_update:
             df.to_csv(file_path, sep='\t', index=False, header=True)
@@ -158,25 +196,22 @@ def main(arg_list=None):
     args, operations = parse_arguments(arg_list)
     if not os.path.isdir(args.data_dir):
         raise HedFileError("DataDirectoryDoesNotExist", f"The root data directory {args.data_dir} does not exist", "")
-    if args.no_backup:
-        backup_name = None
-    else:
-        backup_man = BackupManager(args.data_dir)
-        if not backup_man.get_backup(args.backup_name):
-            raise HedFileError("BackupDoesNotExist", f"Backup {args.backup_name} does not exist. "
-                               f"Please run_remodel_backup first", "")
-        backup_man.restore_backup(args.backup_name, args.task_names, verbose=args.verbose)
-        backup_name = args.backup_name
+    backup_name = handle_backup(args)
     dispatch = Dispatcher(operations, data_root=args.data_dir, backup_name=backup_name, hed_versions=args.hed_versions)
-    if args.use_bids:
-        run_bids_ops(dispatch, args)
-    else:
-        run_direct_ops(dispatch, args)
     save_dir = None
     if args.work_dir:
         save_dir = os.path.realpath(os.path.join(args.work_dir, Dispatcher.REMODELING_SUMMARY_PATH))
-    if not args.no_summaries:
-        dispatch.save_summaries(args.save_formats, individual_summaries=args.individual_summaries, summary_dir=save_dir)
+    files = get_file_list(dispatch.data_root, name_suffix=args.file_suffix, extensions=args.extensions,
+                          exclude_dirs=args.exclude_dirs)
+    task_dict = parse_tasks(files, args.task_names)
+    for task, files in task_dict.items():
+        if args.use_bids:
+            run_bids_ops(dispatch, args, files)
+        else:
+            run_direct_ops(dispatch, args, files)
+        if not args.no_summaries:
+            dispatch.save_summaries(args.save_formats, individual_summaries=args.individual_summaries, 
+                                    summary_dir=save_dir, task_name=task)
 
 
 if __name__ == '__main__':

diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py
@@ -99,7 +99,7 @@ def get_data_file(self, file_designator):
               In this case, the corresponding backup file is read and returned.    
             - If a string is passed and there is no backup manager,
               the data file corresponding to the file_designator is read and returned.    
-            - If a Pandas DataFrame is passed, return a copy.    
+            - If a Pandas DataFrame, return a copy.   
 
         """
         if isinstance(file_designator, pd.DataFrame):
@@ -154,25 +154,32 @@ def run_operations(self, file_path, sidecar=None, verbose=False):
             df = self.post_proc_data(df)
         return df
 
-    def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="separate", summary_dir=None):
+    def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="separate",
+                       summary_dir=None, task_name=""):
         """ Save the summary files in the specified formats.
 
         Parameters:
             save_formats (list):  A list of formats [".txt", ."json"]
-            individual_summaries (str): If True, include summaries of individual files.
+            individual_summaries (str):  "consolidated", "individual", or "none".
             summary_dir (str or None): Directory for saving summaries.
+            task_name (str): Name of task if summaries separated by task or "" if not separated.
 
         Notes:
             The summaries are saved in the dataset derivatives/remodeling folder if no save_dir is provided.
 
+        Notes:
+            - "consolidated" means that the overall summary and summaries of individual files are in one summary file.  
+            - "individual" means that the summaries of individual files are in separate files.
+            - "none" means that only the overall summary is produced.
+
         """
         if not save_formats:
             return
         if not summary_dir:
             summary_dir = self.get_summary_save_dir()
         os.makedirs(summary_dir, exist_ok=True)
-        for context_name, context_item in self.summary_dicts.items():
-            context_item.save(summary_dir, save_formats, individual_summaries=individual_summaries)
+        for summary_name, summary_item in self.summary_dicts.items():
+            summary_item.save(summary_dir, save_formats, individual_summaries=individual_summaries, task_name=task_name)
 
     @staticmethod
     def parse_operations(operation_list):
@@ -242,7 +249,8 @@ def errors_to_str(messages, title="", sep='\n'):
             return title + sep + errors
         return errors
 
-    def get_schema(self, hed_versions):
+    @staticmethod
+    def get_schema(hed_versions):
         if not hed_versions:
             return None
         elif isinstance(hed_versions, str) or isinstance(hed_versions, list):

diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py
@@ -117,7 +117,7 @@ def get_text_summary(self, individual_summaries="separate"):
 
         return summary
 
-    def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate"):
+    def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate", task_name=""):
 
         for file_format in file_formats:
             if file_format == '.txt':
@@ -126,24 +126,29 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate")
                 summary = self.get_summary(individual_summaries=individual_summaries)
             else:
                 continue
-            self._save_summary_files(save_dir, file_format, summary, individual_summaries)
+            self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name)
 
-    def _save_summary_files(self, save_dir, file_format, summary, individual_summaries):
+    def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=''):
         """ Save the files in the appropriate format.
 
         Parameters:
             save_dir (str): Path to the directory in which the summaries will be saved.
             file_format (str): string representing the extension (including .), '.txt' or '.json'.
             summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys).
+            individual_summaries (str): "consolidated", "individual", or "none".
+            task_name (str): Name of task to be included in file name if multiple tasks.
 
         """
         if self.op.append_timecode:
             time_stamp = '_' + get_timestamp()
         else:
             time_stamp = ''
+        if task_name: 
+            task_name = "_" + task_name
         this_save = os.path.join(save_dir, self.op.summary_name + '/')
         os.makedirs(os.path.realpath(this_save), exist_ok=True)
-        filename = os.path.realpath(os.path.join(this_save, self.op.summary_filename + time_stamp + file_format))
+        filename = os.path.realpath(os.path.join(this_save, 
+                                                 self.op.summary_filename + task_name + time_stamp + file_format))
         individual = summary.get("Individual files", {})
         if individual_summaries == "none" or not individual:
             self.dump_summary(filename, summary["Dataset"])
@@ -155,15 +160,16 @@ def _save_summary_files(self, save_dir, file_format, summary, individual_summari
         individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/')
         os.makedirs(os.path.realpath(individual_dir), exist_ok=True)
         for name, sum_str in individual.items():
-            filename = self._get_summary_filepath(individual_dir, name, time_stamp, file_format)
+            filename = self._get_summary_filepath(individual_dir, name, task_name, time_stamp, file_format)
             self.dump_summary(filename, sum_str)
 
-    def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format):
+    def _get_summary_filepath(self, individual_dir, name, task_name, time_stamp, file_format):
         """ Return the filepath for the summary including the timestamp
 
         Parameters:
             individual_dir (str):  path of the directory in which the summary should be stored.
             name (str): Path of the original file from which the summary was extracted.
+            task_name (str): Task name if separate summaries for different tasks or the empty string if not separated.
             time_stamp (str):  Formatted date-time string to be included in the filename of the summary.
 
         Returns:
@@ -176,7 +182,7 @@ def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format):
         match = True
         filename = None
         while match:
-            filename = f"{self.op.summary_filename}_{this_name}_{count}{time_stamp}{file_format}"
+            filename = f"{self.op.summary_filename}_{this_name}{task_name}_{count}{time_stamp}{file_format}"
             filename = os.path.realpath(os.path.join(individual_dir, filename))
             if not os.path.isfile(filename):
                 break

diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py
@@ -182,7 +182,7 @@ def _get_categorical_string(self, result, offset="", indent="   "):
              str: Formatted string suitable for saving in a file or printing.
 
          """
-        cat_dict = result.get('Categorical columns', {})
+        cat_dict = result.get('Categorical column summaries', {})
         if not cat_dict:
             return ""
         count_dict = result['Categorical counts']

diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py
@@ -318,3 +318,13 @@ def _split_entity(piece):
         return {"key": split_piece[0].strip(), "value": split_piece[1].strip()}
     else:
         return {"bad": piece}
+
+
+def get_task_from_file(file_path):
+    filename = os.path.splitext(os.path.basename(file_path))
+    basename = filename[0].strip()
+    position = basename.lower().find("task-")
+    if position == -1:
+        return ""
+    splits = re.split(r'[_.]', basename[position+5:])
+    return splits[0]
diff --git a/readthedocs.yml b/readthedocs.yml
@@ -19,4 +19,3 @@ sphinx:
 python:
   install:
    - requirements: docs/requirements.txt
-  system_packages: true