From c8b5eb1d7218ddcab9543af8ce2a2719286f8db2 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Thu, 6 Apr 2023 15:02:26 -0500
Subject: [PATCH] Updated docs on summary ops
---
docs/source/_templates/layout.html | 2 +-
.../remodeling/operations/base_context.py | 79 ++++++++++++++++--
.../operations/summarize_column_names_op.py | 51 +++++++++++-
.../operations/summarize_column_values_op.py | 67 ++++++++++++++-
.../operations/summarize_definitions_op.py | 58 ++++++++++++-
.../operations/summarize_hed_tags_op.py | 70 +++++++++++++---
.../operations/summarize_hed_type_op.py | 62 +++++++++++---
.../operations/summarize_hed_validation_op.py | 43 ++++++++--
.../summarize_sidecar_from_events_op.py | 82 ++++++++++++++++---
tests/tools/analysis/test_tabular_summary.py | 17 ++++
.../tools/remodeling/cli/test_run_remodel.py | 6 ++
.../operations/test_base_context.py | 2 +-
tests/tools/util/test_schema_util.py | 29 +++++++
13 files changed, 513 insertions(+), 55 deletions(-)
create mode 100644 tests/tools/util/test_schema_util.py
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index 90f37d446..7b917c7e1 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -12,7 +12,7 @@
Main resource page
Project home page
Schema viewer
- Library schema viewer
+ Prerelease schema viewer
Specification
Examples and tutorials
Online tools
diff --git a/hed/tools/remodeling/operations/base_context.py b/hed/tools/remodeling/operations/base_context.py
index 9f6f5e691..1815ebd0d 100644
--- a/hed/tools/remodeling/operations/base_context.py
+++ b/hed/tools/remodeling/operations/base_context.py
@@ -26,20 +26,49 @@ def __init__(self, context_type, context_name, context_filename):
self.summary_dict = {}
def get_summary_details(self, include_individual=True):
+ """ Return a dictionary with the details for individual files and the overall dataset.
+
+ Parameters:
+ include_individual (bool): If True, summaries for individual files are included.
+
+ Returns:
+ dict - a dictionary with 'Dataset' and 'Individual files' keys.
+
+ Notes:
+ - The 'Dataset' value is either a string or a dictionary with the overall summary.
+ - The 'Individual files' value is dictionary whose keys are file names and values are
+ their corresponding summaries.
+
+ Users are expected to provide _merge_all and _get_details_dict to support this.
+
+ """
merged_summary = self._merge_all()
if merged_summary:
- details = self._get_summary_details(merged_summary)
+ details = self._get_details_dict(merged_summary)
else:
details = "Overall summary unavailable"
summary_details = {"Dataset": details, "Individual files": {}}
if include_individual:
for name, count in self.summary_dict.items():
- summary_details["Individual files"][name] = self._get_summary_details(count)
+ summary_details["Individual files"][name] = self._get_details_dict(count)
return summary_details
def get_summary(self, individual_summaries="separate"):
+ """ Return a summary dictionary with the information.
+
+ Parameters:
+ individual_summaries (str): "separate", "consolidated", or "none"
+ Returns:
+ dict - dictionary with "Dataset" and "Individual files" keys.
+
+ Notes: The individual_summaries value is processed as follows
+ - "separate" individual summaries are to be in separate files
+ - "consolidated" means that the individual summaries are in same file as overall summary
+ - "none" means that only the overall summary is produced.
+
+ """
include_individual = individual_summaries == "separate" or individual_summaries == "consolidated"
summary_details = self.get_summary_details(include_individual=include_individual)
dataset_summary = {"Context name": self.context_name, "Context type": self.context_type,
@@ -99,9 +128,17 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate")
summary = self.get_summary(individual_summaries=individual_summaries)
else:
continue
- self._save_separate(save_dir, file_format, summary, individual_summaries)
+ self._save_summary_files(save_dir, file_format, summary, individual_summaries)
- def _save_separate(self, save_dir, file_format, summary, individual_summaries):
+ def _save_summary_files(self, save_dir, file_format, summary, individual_summaries):
+ """ Save the files in the appropriate format.
+
+ Parameters:
+ save_dir (str): Path to the directory in which the summaries will be saved.
+ file_format (str): string representing the extension (including .), '.txt' or '.json'.
+ summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys.
+
+ """
time_stamp = '_' + get_timestamp()
this_save = os.path.join(save_dir, self.context_name + '/')
os.makedirs(os.path.realpath(this_save), exist_ok=True)
@@ -117,10 +154,21 @@ def _save_separate(self, save_dir, file_format, summary, individual_summaries):
individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/')
os.makedirs(os.path.realpath(individual_dir), exist_ok=True)
for name, sum_str in individual.items():
- filename = self._get_individual_filename(individual_dir, name, time_stamp, file_format)
+ filename = self._get_summary_filepath(individual_dir, name, time_stamp, file_format)
self.dump_summary(filename, sum_str)
- def _get_individual_filename(self, individual_dir, name, time_stamp, file_format):
+ def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format):
+ """ Return the filepath for the summary including the timestamp
+
+ Parameters:
+ individual_dir (str): path of the directory in which the summary should be stored.
+ name (str): Path of the original file from which the summary was extracted.
+ time_stamp (str): Formatted date-time string to be included in the filename of the summary.
+
+ Returns:
+ str: Full path name of the summary.
+
+ """
this_name = os.path.basename(name)
this_name = os.path.splitext(this_name)[0]
count = 1
@@ -135,6 +183,20 @@ def _get_individual_filename(self, individual_dir, name, time_stamp, file_format
return filename
def _get_result_string(self, name, result, indent=DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This file should be overridden by each summary.
+
+ """
return f"\n{name}\n{indent}{str(result)}"
@staticmethod
@@ -145,12 +207,15 @@ def dump_summary(filename, summary):
text_file.write(summary)
@abstractmethod
- def _get_summary_details(self, summary_info):
+ def _get_details_dict(self, summary_info):
""" Return the summary-specific information.
Parameters:
summary_info (object): Summary to return info from
+ Returns:
+ dict: dictionary with the results.
+
Notes:
Abstract method be implemented by each individual context summary.
diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py
index f8b2e55c6..be2699066 100644
--- a/hed/tools/remodeling/operations/summarize_column_names_op.py
+++ b/hed/tools/remodeling/operations/summarize_column_names_op.py
@@ -78,15 +78,40 @@ def __init__(self, sum_op):
super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename)
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary information is kept in separate ColumnNameSummary objects for each file.
+ - The summary needs a "name" str and a "column_names" list.
+ - The summary uses ColumnNameSummary as the summary object.
+ """
name = new_context['name']
if name not in self.summary_dict:
self.summary_dict[name] = ColumnNameSummary(name=name)
self.summary_dict[name].update(name, new_context["column_names"])
- def _get_summary_details(self, column_summary):
+ def _get_details_dict(self, column_summary):
+ """ Return the summary dictionary extracted from a ColumnNameSummary.
+
+ Parameters:
+ column_summary (ColumnNameSummary): A column name summary for the data file.
+
+ Returns:
+ dict - a dictionary with the summary information for column names.
+
+ """
return column_summary.get_summary()
def _merge_all(self):
+ """ Create a ColumnNameSummary containing the overall dataset summary.
+
+ Returns:
+ ColumnNameSummary - the overall summary object for column names.
+
+ """
all_sum = ColumnNameSummary(name='Dataset')
for key, counts in self.summary_dict.items():
for name, pos in counts.file_dict.items():
@@ -94,6 +119,20 @@ def _merge_all(self):
return all_sum
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This calls _get_dataset_string to get the overall summary string.
+
+ """
if name == "Dataset":
return self._get_dataset_string(result, indent)
columns = result["Columns"][0]
@@ -101,6 +140,16 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
@staticmethod
def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
sum_list = [f"Dataset: Number of files={result.get('Number files', 0)}"]
for element in result.get("Columns", []):
sum_list.append(f"{indent}Columns: {str(element['Column names'])}")
diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py
index ed0166dd5..000346565 100644
--- a/hed/tools/remodeling/operations/summarize_column_values_op.py
+++ b/hed/tools/remodeling/operations/summarize_column_values_op.py
@@ -88,28 +88,79 @@ def __init__(self, sum_op):
self.skip_columns = sum_op.skip_columns
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary information is kept in separate TabularSummary objects for each file.
+ - The summary needs a "name" str and a "df" .
+
+ """
name = new_context['name']
if name not in self.summary_dict:
self.summary_dict[name] = \
TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name=name)
self.summary_dict[name].update(new_context['df'])
- def _get_summary_details(self, summary):
+ def _get_details_dict(self, summary):
+ """ Return a dictionary with the summary contained in a TabularSummary
+
+ Parameters:
+ summary (TabularSummary): Dictionary of merged summary information.
+
+ Returns:
+ dict: Dictionary with the information suitable for extracting printout.
+
+ """
return summary.get_summary(as_json=False)
def _merge_all(self):
+ """ Create a TabularSummary containing the overall dataset summary.
+
+ Returns:
+ TabularSummary - the summary object for column values.
+
+ """
all_sum = TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name='Dataset')
for key, counts in self.summary_dict.items():
all_sum.update_summary(counts)
return all_sum
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This calls _get_dataset_string to get the overall summary string and
+ _get_individual_string to get an individual summary string.
+
+ """
+
if name == "Dataset":
return self._get_dataset_string(result, indent=indent)
- return self._get_individual_string(name, result, indent=indent)
+ return self._get_individual_string(result, indent=indent)
@staticmethod
def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
sum_list = [f"Dataset: Total events={result.get('Total events', 0)} "
f"Total files={result.get('Total files', 0)}"]
cat_cols = result.get("Categorical columns", {})
@@ -121,7 +172,17 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
return "\n".join(sum_list)
@staticmethod
- def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT):
+ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the summary for an individual tabular file.
+
+ Parameters:
+ result (dict): Dictionary of summary information for a particular tabular file.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
sum_list = [f"Total events={result.get('Total events', 0)}"]
cat_cols = result.get("Categorical columns", {})
if cat_cols:
diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py
index 26f9b7ab6..45fda3d82 100644
--- a/hed/tools/remodeling/operations/summarize_definitions_op.py
+++ b/hed/tools/remodeling/operations/summarize_definitions_op.py
@@ -86,7 +86,15 @@ def __init__(self, sum_op):
self.errors = {}
def update_context(self, new_context):
- name = new_context['name']
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary needs a "name" str, a "schema" and a "Sidecar".
+
+ """
data_input = TabularInput(new_context['df'], sidecar=new_context['sidecar'], name=new_context['name'])
sidecar = Sidecar(new_context['sidecar'])
df, _ = assemble_hed(data_input, sidecar, new_context['schema'],
@@ -96,21 +104,63 @@ def update_context(self, new_context):
known_defs=self.defs, ambiguous_defs=self.unresolved)
self.errors.update(errors)
- def _get_summary_details(self, summary):
+ def _get_details_dict(self, summary):
return None
def _merge_all(self):
+ """ Create an Object containing the definition summary.
+
+ Returns:
+ Object - the overall summary object for definitions.
+
+ """
+
return None
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This calls _get_dataset_string to get the overall summary string and
+ _get_individual_string to get an individual summary string.
+
+ """
if name == "Dataset":
return self._get_dataset_string(result, indent=indent)
- return self._get_individual_string(name, result, indent=indent)
+ return self._get_individual_string(result, indent=indent)
@staticmethod
def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
return ""
@staticmethod
- def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT):
+ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the summary for an individual tabular file.
+
+ Parameters:
+ result (dict): Dictionary of summary information for a particular tabular file.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
return ""
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index a8d220df8..89e494338 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -16,11 +16,10 @@ class SummarizeHedTagsOp(BaseOp):
- **summary_name** (*str*): The name of the summary.
- **summary_filename** (*str*): Base filename of the summary.
- **tags** (*dict*): Type tag to get_summary separately (e.g. 'condition-variable' or 'task').
-
+
Optional remodeling parameters:
- **expand_context** (*bool*): If True, include counts from expanded context (not supported).
-
The purpose of this op is to produce a summary of the occurrences of specified tag. This summary
is often used with 'condition-variable' to produce a summary of the experimental design.
@@ -95,6 +94,15 @@ def __init__(self, sum_op):
self.expand_context = sum_op.expand_context
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary needs a "name" str, a "schema", a "df, and a "Sidecar".
+
+ """
counts = HedTagCounts(new_context['name'], total_events=len(new_context['df']))
sidecar = new_context['sidecar']
if sidecar and not isinstance(sidecar, Sidecar):
@@ -108,7 +116,7 @@ def update_context(self, new_context):
counts.update_event_counts(hed, new_context['name'])
self.summary_dict[new_context["name"]] = counts
- def _get_summary_details(self, merge_counts):
+ def _get_details_dict(self, merge_counts):
template, unmatched = merge_counts.organize_tags(self.tags)
details = {}
for key, key_list in self.tags.items():
@@ -119,11 +127,33 @@ def _get_summary_details(self, merge_counts):
"Main tags": details, "Other tags": leftovers}
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This calls _get_dataset_string to get the overall summary string and
+ _get_individual_string to get an individual summary string.
+
+ """
if name == 'Dataset':
return self._get_dataset_string(result, indent=indent)
- return self._get_individual_string(name, result, indent=indent)
+ return self._get_individual_string(result, indent=indent)
def _merge_all(self):
+ """ Create a HedTagCounts containing the overall dataset HED tag summary.
+
+ Returns:
+ HedTagCounts - the overall dataset summary object for HED tag counts.
+
+ """
+
all_counts = HedTagCounts('Dataset')
for key, counts in self.summary_dict.items():
all_counts.merge_tag_dicts(counts.tag_dict)
@@ -131,27 +161,47 @@ def _merge_all(self):
all_counts.files[file_name] = ""
all_counts.total_events = all_counts.total_events + counts.total_events
return all_counts
-
+
@staticmethod
def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
sum_list = [f"Dataset: Total events={result.get('total_events', 0)} "
f"Total files={len(result.get('files', []))}"]
sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent)
return "\n".join(sum_list)
-
+
@staticmethod
- def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT):
+ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the summary for an individual tabular file.
+
+ Parameters:
+ result (dict): Dictionary of summary information for a particular tabular file.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
sum_list = [f"Total events={result.get('total_events', 0)}"]
sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent)
return "\n".join(sum_list)
-
+
@staticmethod
def _tag_details(tags):
tag_list = []
for tag in tags:
tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]")
return tag_list
-
+
@staticmethod
def _get_tag_list(tag_info, indent=BaseContext.DISPLAY_INDENT):
sum_list = [f"\n{indent}Main tags[events,files]:"]
@@ -171,5 +221,3 @@ def _get_details(key_list, template, verbose=False):
for tag_cnt in template[item.lower()]:
key_details.append(tag_cnt.get_info(verbose=verbose))
return key_details
-
-
diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py
index 85ea41d7d..7b3993357 100644
--- a/hed/tools/remodeling/operations/summarize_hed_type_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py
@@ -88,6 +88,16 @@ def __init__(self, sum_op):
self.type_tag = sum_op.type_tag
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary needs a "name" str, a "schema", a "df, and a "Sidecar".
+
+ """
+
sidecar = new_context['sidecar']
if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar)
@@ -102,17 +112,14 @@ def update_context(self, new_context):
counts.add_descriptions(type_values.definitions)
self.summary_dict[new_context["name"]] = counts
- def _get_summary_details(self, counts):
+ def _get_details_dict(self, counts):
return counts.get_summary()
def _merge_all(self):
- """ Return merged information.
+ """ Create a HedTypeCounts containing the overall dataset HED type summary.
Returns:
- object: Consolidated summary of information.
-
- Notes:
- Abstract method be implemented by each individual context summary.
+ HedTypeCounts - the overall dataset summary object for HED type summary.
"""
all_counts = HedTypeCounts('Dataset', self.type_tag)
@@ -121,12 +128,37 @@ def _merge_all(self):
return all_counts
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This calls _get_dataset_string to get the overall summary string and
+ _get_individual_string to get an individual summary string.
+
+ """
if name == "Dataset":
return self._get_dataset_string(result, indent=indent)
- return self._get_individual_string(name, result, indent=indent)
+ return self._get_individual_string(result, indent=indent)
@staticmethod
def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
details = result.get('details', {})
sum_list = [f"Dataset: Type={result['type_tag']} Type values={len(details)} "
f"Total events={result.get('total_events', 0)} Total files={len(result.get('files', []))}"]
@@ -146,11 +178,21 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
return "\n".join(sum_list)
@staticmethod
- def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT):
+ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the summary for an individual tabular file.
+
+ Parameters:
+ result (dict): Dictionary of summary information for a particular tabular file.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
details = result.get('details', {})
sum_list = [f"Type={result['type_tag']} Type values={len(details)} "
f"Total events={result.get('total_events', 0)}"]
-
+
for key, item in details.items():
sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events")
str1 = ""
@@ -175,4 +217,4 @@ def _level_details(level_counts, offset="", indent=""):
level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}")
if details['description']:
level_list.append(f"{offset}{indent*3}Description: {details['description']}")
- return level_list
\ No newline at end of file
+ return level_list
diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
index d1bd8f53e..8120371ad 100644
--- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
@@ -47,7 +47,7 @@ def __init__(self, parameters):
TypeError
- If a parameter has the wrong type.
-
+
"""
super().__init__(self.PARAMS, parameters)
self.summary_name = parameters['summary_name']
@@ -86,6 +86,20 @@ def __init__(self, sum_op):
self.check_for_warnings = sum_op.check_for_warnings
def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
+
+ Notes:
+ This gets the error list from "sidecar_issues" and "event_issues".
+
+ """
if result["is_merged"]:
sum_list = [f"{name}: [{result['total_sidecar_files']} sidecar files, "
@@ -102,6 +116,15 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
return "\n".join(sum_list)
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary needs a "name" str, a schema, a "df", and a "Sidecar".
+ """
+
results = self.get_empty_results()
results["total_event_files"] = 1
results["event_issues"][new_context["name"]] = []
@@ -128,17 +151,23 @@ def update_context(self, new_context):
results['event_issues'][new_context["name"]] = issues
results['total_event_issues'] = len(issues)
- def _get_summary_details(self, summary_info):
+ def _get_details_dict(self, summary_info):
+ """Return the summary details from the summary_info.
+
+ Parameters:
+ summary_info (dict): Dictionary of issues
+
+ Returns:
+ dict: Same summary_info as was passed in.
+
+ """
return summary_info
def _merge_all(self):
- """ Return merged information.
+ """ Create a dictionary containing all of the errors in the dataset.
Returns:
- object: Consolidated summary of information.
-
- Notes:
- Abstract method be implemented by each individual context summary.
+ dict - dictionary of issues organized into sidecar_issues and event_issues.
"""
diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py
index 95fc0eca6..0a403ac4b 100644
--- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py
+++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py
@@ -43,7 +43,7 @@ def __init__(self, parameters):
KeyError
- If a required parameter is missing.
- If an unexpected parameter is provided.
-
+
TypeError
- If a parameter has the wrong type.
@@ -88,15 +88,24 @@ def __init__(self, sum_op):
self.skip_cols = sum_op.skip_columns
def update_context(self, new_context):
+ """ Update the summary for a given tabular input file.
+
+ Parameters:
+ new_context (dict): A dictionary with the parameters needed to update a summary.
+
+ Notes:
+ - The summary needs a "name" str and a "df".
+ """
+
tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_context["name"])
tab_sum.update(new_context['df'], new_context['name'])
self.summary_dict[new_context["name"]] = tab_sum
- def _get_summary_details(self, summary_info):
+ def _get_details_dict(self, summary_info):
""" Return the summary-specific information.
Parameters:
- summary_info (Object): Summary to return info from
+ summary_info (TabularSummary): Summary to return info from
Notes:
Abstract method be implemented by each individual context summary.
@@ -111,16 +120,69 @@ def _merge_all(self):
""" Merge summary information from all of the files
Returns:
- object: Consolidated summary of information.
+ TabularSummary: Consolidated summary of information.
+
+ """
+
+ all_sum = TabularSummary(name='Dataset')
+ for key, tab_sum in self.summary_dict.items():
+ all_sum.update_summary(tab_sum)
+ return all_sum
+
+ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a formatted string with the summary for the indicated name.
+
+ Parameters:
+ name (str): Identifier (usually the filename) of the individual file.
+ result (dict): The dictionary of the summary results indexed by name.
+ indent (str): A string containing spaces used for indentation (usually 3 spaces).
+
+ Returns:
+ str - The results in a printable format ready to be saved to a text file.
Notes:
- Abstract method be implemented by each individual context summary.
+ This calls _get_dataset_string to get the overall summary string and
+ _get_individual_string to get an individual summary string.
"""
- return {}
- def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
if name == "Dataset":
- return "Dataset: Currently no overall sidecar extraction is available"
- json_str = f"\nSidecar:\n{json.dumps(result['sidecar'], indent=4)}"
- return f"{name}: Total events={result['total_events']} Skip columns: {str(result['skip_cols'])}{json_str}"
+ return self._get_dataset_string(result, indent=indent)
+ return self._get_individual_string(result, indent=indent)
+
+ @staticmethod
+ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the overall summary for all of the tabular files.
+
+ Parameters:
+ result (dict): Dictionary of merged summary information.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
+ sum_list = [f"Dataset: Total events={result.get('total_events', 0)} "
+ f"Total files={result.get('total_files', 0)}",
+ f"Skip columns: {str(result.get('skip_cols', []))}",
+ f"Value columns: {str(result.get('value_cols', []))}",
+ f"Sidecar:\n{json.dumps(result['sidecar'], indent=indent)}"]
+ return "\n".join(sum_list)
+
+ @staticmethod
+ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT):
+ """ Return a string with the summary for an individual tabular file.
+
+ Parameters:
+ result (dict): Dictionary of summary information for a particular tabular file.
+ indent (str): String of blanks used as the amount to indent for readability.
+
+ Returns:
+ str: Formatted string suitable for saving in a file or printing.
+
+ """
+ sum_list = [f"Total events={result.get('total_events', 0)}",
+ f"Skip columns: {str(result.get('skip_cols', []))}",
+ f"Value columns: {str(result.get('value_cols', []))}",
+ f"Sidecar:\n{json.dumps(result['sidecar'], indent=indent)}"]
+ return "\n".join(sum_list)
diff --git a/tests/tools/analysis/test_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py
index cefecebfd..1a35dabec 100644
--- a/tests/tools/analysis/test_tabular_summary.py
+++ b/tests/tools/analysis/test_tabular_summary.py
@@ -189,6 +189,23 @@ def test_make_combined_dicts(self):
self.assertEqual(len(dicts_all2.categorical_info), 7,
"make_combined_dicts should return right number of entries")
+ def test_update_summary(self):
+ files_bids = get_file_list(self.bids_base_dir, extensions=[".tsv"], name_suffix="_events")
+ tab_list = []
+ skip_cols = ['onset', 'duration', 'sample', 'value']
+ value_cols = ['stim_file', 'trial']
+ tab_all = TabularSummary(skip_cols=skip_cols, value_cols=value_cols)
+ for name in files_bids:
+ tab = TabularSummary(skip_cols=skip_cols, value_cols=value_cols)
+ tab_list.append(tab)
+ df = get_new_dataframe(name)
+ tab.update(df, name=name)
+ self.assertEqual(tab.total_events, 200)
+ self.assertEqual(tab.total_files, 1)
+ tab_all.update_summary(tab)
+ self.assertEqual(len(files_bids), tab_all.total_files)
+ self.assertEqual(len(files_bids)*200, tab_all.total_events)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py
index 099f80252..18ae48734 100644
--- a/tests/tools/remodeling/cli/test_run_remodel.py
+++ b/tests/tools/remodeling/cli/test_run_remodel.py
@@ -166,6 +166,12 @@ def test_run_bids_ops_verbose(self):
main(arg_list)
self.assertFalse(fp.getvalue())
+ def test_temp(self):
+ data_root = "g:/ds002718OpenNeuro"
+ model_path = 'G:/wh_excerpt_rmdl.json'
+ arg_list = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-b', '-n', '']
+ main(arg_list)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/tools/remodeling/operations/test_base_context.py b/tests/tools/remodeling/operations/test_base_context.py
index 0ac8bb548..68e8ddc0f 100644
--- a/tests/tools/remodeling/operations/test_base_context.py
+++ b/tests/tools/remodeling/operations/test_base_context.py
@@ -11,7 +11,7 @@ def __init__(self):
self.summary_dict["data1"] = "test data 1"
self.summary_dict["data2"] = "test data 2"
- def _get_summary_details(self, include_individual=True):
+ def _get_details_dict(self, include_individual=True):
summary = {"name": self.context_name}
if include_individual:
summary["more"] = "more stuff"
diff --git a/tests/tools/util/test_schema_util.py b/tests/tools/util/test_schema_util.py
new file mode 100644
index 000000000..8ee8d1210
--- /dev/null
+++ b/tests/tools/util/test_schema_util.py
@@ -0,0 +1,29 @@
+import os
+import pandas as pd
+import unittest
+from hed.errors.exceptions import HedFileError
+from hed.schema.hed_schema_io import load_schema_version
+from hed.tools.util.schema_util import flatten_schema
+
+
+class Test(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ pass
+
+ @classmethod
+ def tearDownClass(cls):
+ pass
+
+ def test_flatten_schema(self):
+ hed_schema = load_schema_version('8.1.0')
+ df = flatten_schema(hed_schema, skip_non_tag=True)
+ # df.to_csv("h:/Version_3_column.tsv", sep='\t', index=None)
+ self.assertIsInstance(df, pd.DataFrame)
+ self.assertEqual(len(df.columns), 3)
+ self.assertEqual(len(df.index), 1037)
+
+
+if __name__ == '__main__':
+ unittest.main()