diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html index 90f37d446..7b917c7e1 100644 --- a/docs/source/_templates/layout.html +++ b/docs/source/_templates/layout.html @@ -12,7 +12,7 @@
  • Main resource page
  • Project home page
  • Schema viewer
  • -
  • Library schema viewer
  • +
  • Prerelease schema viewer
  • Specification
  • Examples and tutorials
  • Online tools
  • diff --git a/hed/tools/remodeling/operations/base_context.py b/hed/tools/remodeling/operations/base_context.py index 9f6f5e691..1815ebd0d 100644 --- a/hed/tools/remodeling/operations/base_context.py +++ b/hed/tools/remodeling/operations/base_context.py @@ -26,20 +26,49 @@ def __init__(self, context_type, context_name, context_filename): self.summary_dict = {} def get_summary_details(self, include_individual=True): + """ Return a dictionary with the details for individual files and the overall dataset. + + Parameters: + include_individual (bool): If True, summaries for individual files are included. + + Returns: + dict - a dictionary with 'Dataset' and 'Individual files' keys. + + Notes: + - The 'Dataset' value is either a string or a dictionary with the overall summary. + - The 'Individual files' value is dictionary whose keys are file names and values are + their corresponding summaries. + + Users are expected to provide _merge_all and _get_details_dict to support this. + + """ merged_summary = self._merge_all() if merged_summary: - details = self._get_summary_details(merged_summary) + details = self._get_details_dict(merged_summary) else: details = "Overall summary unavailable" summary_details = {"Dataset": details, "Individual files": {}} if include_individual: for name, count in self.summary_dict.items(): - summary_details["Individual files"][name] = self._get_summary_details(count) + summary_details["Individual files"][name] = self._get_details_dict(count) return summary_details def get_summary(self, individual_summaries="separate"): + """ Return a summary dictionary with the information. + + Parameters: + individual_summaries (str): "separate", "consolidated", or "none" + Returns: + dict - dictionary with "Dataset" and "Individual files" keys. + + Notes: The individual_summaries value is processed as follows + - "separate" individual summaries are to be in separate files + - "consolidated" means that the individual summaries are in same file as overall summary + - "none" means that only the overall summary is produced. + + """ include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_summary_details(include_individual=include_individual) dataset_summary = {"Context name": self.context_name, "Context type": self.context_type, @@ -99,9 +128,17 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate") summary = self.get_summary(individual_summaries=individual_summaries) else: continue - self._save_separate(save_dir, file_format, summary, individual_summaries) + self._save_summary_files(save_dir, file_format, summary, individual_summaries) - def _save_separate(self, save_dir, file_format, summary, individual_summaries): + def _save_summary_files(self, save_dir, file_format, summary, individual_summaries): + """ Save the files in the appropriate format. + + Parameters: + save_dir (str): Path to the directory in which the summaries will be saved. + file_format (str): string representing the extension (including .), '.txt' or '.json'. + summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys. + + """ time_stamp = '_' + get_timestamp() this_save = os.path.join(save_dir, self.context_name + '/') os.makedirs(os.path.realpath(this_save), exist_ok=True) @@ -117,10 +154,21 @@ def _save_separate(self, save_dir, file_format, summary, individual_summaries): individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + '/') os.makedirs(os.path.realpath(individual_dir), exist_ok=True) for name, sum_str in individual.items(): - filename = self._get_individual_filename(individual_dir, name, time_stamp, file_format) + filename = self._get_summary_filepath(individual_dir, name, time_stamp, file_format) self.dump_summary(filename, sum_str) - def _get_individual_filename(self, individual_dir, name, time_stamp, file_format): + def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format): + """ Return the filepath for the summary including the timestamp + + Parameters: + individual_dir (str): path of the directory in which the summary should be stored. + name (str): Path of the original file from which the summary was extracted. + time_stamp (str): Formatted date-time string to be included in the filename of the summary. + + Returns: + str: Full path name of the summary. + + """ this_name = os.path.basename(name) this_name = os.path.splitext(this_name)[0] count = 1 @@ -135,6 +183,20 @@ def _get_individual_filename(self, individual_dir, name, time_stamp, file_format return filename def _get_result_string(self, name, result, indent=DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This file should be overridden by each summary. + + """ return f"\n{name}\n{indent}{str(result)}" @staticmethod @@ -145,12 +207,15 @@ def dump_summary(filename, summary): text_file.write(summary) @abstractmethod - def _get_summary_details(self, summary_info): + def _get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: summary_info (object): Summary to return info from + Returns: + dict: dictionary with the results. + Notes: Abstract method be implemented by each individual context summary. diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index f8b2e55c6..be2699066 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -78,15 +78,40 @@ def __init__(self, sum_op): super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary information is kept in separate ColumnNameSummary objects for each file. + - The summary needs a "name" str and a "column_names" list. + - The summary uses ColumnNameSummary as the summary object. + """ name = new_context['name'] if name not in self.summary_dict: self.summary_dict[name] = ColumnNameSummary(name=name) self.summary_dict[name].update(name, new_context["column_names"]) - def _get_summary_details(self, column_summary): + def _get_details_dict(self, column_summary): + """ Return the summary dictionary extracted from a ColumnNameSummary. + + Parameters: + column_summary (ColumnNameSummary): A column name summary for the data file. + + Returns: + dict - a dictionary with the summary information for column names. + + """ return column_summary.get_summary() def _merge_all(self): + """ Create a ColumnNameSummary containing the overall dataset summary. + + Returns: + ColumnNameSummary - the overall summary object for column names. + + """ all_sum = ColumnNameSummary(name='Dataset') for key, counts in self.summary_dict.items(): for name, pos in counts.file_dict.items(): @@ -94,6 +119,20 @@ def _merge_all(self): return all_sum def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This calls _get_dataset_string to get the overall summary string. + + """ if name == "Dataset": return self._get_dataset_string(result, indent) columns = result["Columns"][0] @@ -101,6 +140,16 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): @staticmethod def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ sum_list = [f"Dataset: Number of files={result.get('Number files', 0)}"] for element in result.get("Columns", []): sum_list.append(f"{indent}Columns: {str(element['Column names'])}") diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index ed0166dd5..000346565 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -88,28 +88,79 @@ def __init__(self, sum_op): self.skip_columns = sum_op.skip_columns def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary information is kept in separate TabularSummary objects for each file. + - The summary needs a "name" str and a "df" . + + """ name = new_context['name'] if name not in self.summary_dict: self.summary_dict[name] = \ TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name=name) self.summary_dict[name].update(new_context['df']) - def _get_summary_details(self, summary): + def _get_details_dict(self, summary): + """ Return a dictionary with the summary contained in a TabularSummary + + Parameters: + summary (TabularSummary): Dictionary of merged summary information. + + Returns: + dict: Dictionary with the information suitable for extracting printout. + + """ return summary.get_summary(as_json=False) def _merge_all(self): + """ Create a TabularSummary containing the overall dataset summary. + + Returns: + TabularSummary - the summary object for column values. + + """ all_sum = TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name='Dataset') for key, counts in self.summary_dict.items(): all_sum.update_summary(counts) return all_sum def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This calls _get_dataset_string to get the overall summary string and + _get_individual_string to get an individual summary string. + + """ + if name == "Dataset": return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(name, result, indent=indent) + return self._get_individual_string(result, indent=indent) @staticmethod def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={result.get('Total files', 0)}"] cat_cols = result.get("Categorical columns", {}) @@ -121,7 +172,17 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): return "\n".join(sum_list) @staticmethod - def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. + + Parameters: + result (dict): Dictionary of summary information for a particular tabular file. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ sum_list = [f"Total events={result.get('Total events', 0)}"] cat_cols = result.get("Categorical columns", {}) if cat_cols: diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 26f9b7ab6..45fda3d82 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -86,7 +86,15 @@ def __init__(self, sum_op): self.errors = {} def update_context(self, new_context): - name = new_context['name'] + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary needs a "name" str, a "schema" and a "Sidecar". + + """ data_input = TabularInput(new_context['df'], sidecar=new_context['sidecar'], name=new_context['name']) sidecar = Sidecar(new_context['sidecar']) df, _ = assemble_hed(data_input, sidecar, new_context['schema'], @@ -96,21 +104,63 @@ def update_context(self, new_context): known_defs=self.defs, ambiguous_defs=self.unresolved) self.errors.update(errors) - def _get_summary_details(self, summary): + def _get_details_dict(self, summary): return None def _merge_all(self): + """ Create an Object containing the definition summary. + + Returns: + Object - the overall summary object for definitions. + + """ + return None def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This calls _get_dataset_string to get the overall summary string and + _get_individual_string to get an individual summary string. + + """ if name == "Dataset": return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(name, result, indent=indent) + return self._get_individual_string(result, indent=indent) @staticmethod def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ return "" @staticmethod - def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. + + Parameters: + result (dict): Dictionary of summary information for a particular tabular file. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ return "" diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index a8d220df8..89e494338 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -16,11 +16,10 @@ class SummarizeHedTagsOp(BaseOp): - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. - **tags** (*dict*): Type tag to get_summary separately (e.g. 'condition-variable' or 'task'). - + Optional remodeling parameters: - **expand_context** (*bool*): If True, include counts from expanded context (not supported). - The purpose of this op is to produce a summary of the occurrences of specified tag. This summary is often used with 'condition-variable' to produce a summary of the experimental design. @@ -95,6 +94,15 @@ def __init__(self, sum_op): self.expand_context = sum_op.expand_context def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". + + """ counts = HedTagCounts(new_context['name'], total_events=len(new_context['df'])) sidecar = new_context['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): @@ -108,7 +116,7 @@ def update_context(self, new_context): counts.update_event_counts(hed, new_context['name']) self.summary_dict[new_context["name"]] = counts - def _get_summary_details(self, merge_counts): + def _get_details_dict(self, merge_counts): template, unmatched = merge_counts.organize_tags(self.tags) details = {} for key, key_list in self.tags.items(): @@ -119,11 +127,33 @@ def _get_summary_details(self, merge_counts): "Main tags": details, "Other tags": leftovers} def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This calls _get_dataset_string to get the overall summary string and + _get_individual_string to get an individual summary string. + + """ if name == 'Dataset': return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(name, result, indent=indent) + return self._get_individual_string(result, indent=indent) def _merge_all(self): + """ Create a HedTagCounts containing the overall dataset HED tag summary. + + Returns: + HedTagCounts - the overall dataset summary object for HED tag counts. + + """ + all_counts = HedTagCounts('Dataset') for key, counts in self.summary_dict.items(): all_counts.merge_tag_dicts(counts.tag_dict) @@ -131,27 +161,47 @@ def _merge_all(self): all_counts.files[file_name] = "" all_counts.total_events = all_counts.total_events + counts.total_events return all_counts - + @staticmethod def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ sum_list = [f"Dataset: Total events={result.get('total_events', 0)} " f"Total files={len(result.get('files', []))}"] sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent) return "\n".join(sum_list) - + @staticmethod - def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. + + Parameters: + result (dict): Dictionary of summary information for a particular tabular file. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ sum_list = [f"Total events={result.get('total_events', 0)}"] sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent) return "\n".join(sum_list) - + @staticmethod def _tag_details(tags): tag_list = [] for tag in tags: tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") return tag_list - + @staticmethod def _get_tag_list(tag_info, indent=BaseContext.DISPLAY_INDENT): sum_list = [f"\n{indent}Main tags[events,files]:"] @@ -171,5 +221,3 @@ def _get_details(key_list, template, verbose=False): for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details - - diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 85ea41d7d..7b3993357 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -88,6 +88,16 @@ def __init__(self, sum_op): self.type_tag = sum_op.type_tag def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". + + """ + sidecar = new_context['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) @@ -102,17 +112,14 @@ def update_context(self, new_context): counts.add_descriptions(type_values.definitions) self.summary_dict[new_context["name"]] = counts - def _get_summary_details(self, counts): + def _get_details_dict(self, counts): return counts.get_summary() def _merge_all(self): - """ Return merged information. + """ Create a HedTypeCounts containing the overall dataset HED type summary. Returns: - object: Consolidated summary of information. - - Notes: - Abstract method be implemented by each individual context summary. + HedTypeCounts - the overall dataset summary object for HED type summary. """ all_counts = HedTypeCounts('Dataset', self.type_tag) @@ -121,12 +128,37 @@ def _merge_all(self): return all_counts def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This calls _get_dataset_string to get the overall summary string and + _get_individual_string to get an individual summary string. + + """ if name == "Dataset": return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(name, result, indent=indent) + return self._get_individual_string(result, indent=indent) @staticmethod def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ details = result.get('details', {}) sum_list = [f"Dataset: Type={result['type_tag']} Type values={len(details)} " f"Total events={result.get('total_events', 0)} Total files={len(result.get('files', []))}"] @@ -146,11 +178,21 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): return "\n".join(sum_list) @staticmethod - def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. + + Parameters: + result (dict): Dictionary of summary information for a particular tabular file. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ details = result.get('details', {}) sum_list = [f"Type={result['type_tag']} Type values={len(details)} " f"Total events={result.get('total_events', 0)}"] - + for key, item in details.items(): sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") str1 = "" @@ -175,4 +217,4 @@ def _level_details(level_counts, offset="", indent=""): level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}") if details['description']: level_list.append(f"{offset}{indent*3}Description: {details['description']}") - return level_list \ No newline at end of file + return level_list diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index d1bd8f53e..8120371ad 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -47,7 +47,7 @@ def __init__(self, parameters): TypeError - If a parameter has the wrong type. - + """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] @@ -86,6 +86,20 @@ def __init__(self, sum_op): self.check_for_warnings = sum_op.check_for_warnings def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. + + Notes: + This gets the error list from "sidecar_issues" and "event_issues". + + """ if result["is_merged"]: sum_list = [f"{name}: [{result['total_sidecar_files']} sidecar files, " @@ -102,6 +116,15 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return "\n".join(sum_list) def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary needs a "name" str, a schema, a "df", and a "Sidecar". + """ + results = self.get_empty_results() results["total_event_files"] = 1 results["event_issues"][new_context["name"]] = [] @@ -128,17 +151,23 @@ def update_context(self, new_context): results['event_issues'][new_context["name"]] = issues results['total_event_issues'] = len(issues) - def _get_summary_details(self, summary_info): + def _get_details_dict(self, summary_info): + """Return the summary details from the summary_info. + + Parameters: + summary_info (dict): Dictionary of issues + + Returns: + dict: Same summary_info as was passed in. + + """ return summary_info def _merge_all(self): - """ Return merged information. + """ Create a dictionary containing all of the errors in the dataset. Returns: - object: Consolidated summary of information. - - Notes: - Abstract method be implemented by each individual context summary. + dict - dictionary of issues organized into sidecar_issues and event_issues. """ diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 95fc0eca6..0a403ac4b 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -43,7 +43,7 @@ def __init__(self, parameters): KeyError - If a required parameter is missing. - If an unexpected parameter is provided. - + TypeError - If a parameter has the wrong type. @@ -88,15 +88,24 @@ def __init__(self, sum_op): self.skip_cols = sum_op.skip_columns def update_context(self, new_context): + """ Update the summary for a given tabular input file. + + Parameters: + new_context (dict): A dictionary with the parameters needed to update a summary. + + Notes: + - The summary needs a "name" str and a "df". + """ + tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_context["name"]) tab_sum.update(new_context['df'], new_context['name']) self.summary_dict[new_context["name"]] = tab_sum - def _get_summary_details(self, summary_info): + def _get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: - summary_info (Object): Summary to return info from + summary_info (TabularSummary): Summary to return info from Notes: Abstract method be implemented by each individual context summary. @@ -111,16 +120,69 @@ def _merge_all(self): """ Merge summary information from all of the files Returns: - object: Consolidated summary of information. + TabularSummary: Consolidated summary of information. + + """ + + all_sum = TabularSummary(name='Dataset') + for key, tab_sum in self.summary_dict.items(): + all_sum.update_summary(tab_sum) + return all_sum + + def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + """ Return a formatted string with the summary for the indicated name. + + Parameters: + name (str): Identifier (usually the filename) of the individual file. + result (dict): The dictionary of the summary results indexed by name. + indent (str): A string containing spaces used for indentation (usually 3 spaces). + + Returns: + str - The results in a printable format ready to be saved to a text file. Notes: - Abstract method be implemented by each individual context summary. + This calls _get_dataset_string to get the overall summary string and + _get_individual_string to get an individual summary string. """ - return {} - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): if name == "Dataset": - return "Dataset: Currently no overall sidecar extraction is available" - json_str = f"\nSidecar:\n{json.dumps(result['sidecar'], indent=4)}" - return f"{name}: Total events={result['total_events']} Skip columns: {str(result['skip_cols'])}{json_str}" + return self._get_dataset_string(result, indent=indent) + return self._get_individual_string(result, indent=indent) + + @staticmethod + def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the overall summary for all of the tabular files. + + Parameters: + result (dict): Dictionary of merged summary information. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ + sum_list = [f"Dataset: Total events={result.get('total_events', 0)} " + f"Total files={result.get('total_files', 0)}", + f"Skip columns: {str(result.get('skip_cols', []))}", + f"Value columns: {str(result.get('value_cols', []))}", + f"Sidecar:\n{json.dumps(result['sidecar'], indent=indent)}"] + return "\n".join(sum_list) + + @staticmethod + def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. + + Parameters: + result (dict): Dictionary of summary information for a particular tabular file. + indent (str): String of blanks used as the amount to indent for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ + sum_list = [f"Total events={result.get('total_events', 0)}", + f"Skip columns: {str(result.get('skip_cols', []))}", + f"Value columns: {str(result.get('value_cols', []))}", + f"Sidecar:\n{json.dumps(result['sidecar'], indent=indent)}"] + return "\n".join(sum_list) diff --git a/tests/tools/analysis/test_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py index cefecebfd..1a35dabec 100644 --- a/tests/tools/analysis/test_tabular_summary.py +++ b/tests/tools/analysis/test_tabular_summary.py @@ -189,6 +189,23 @@ def test_make_combined_dicts(self): self.assertEqual(len(dicts_all2.categorical_info), 7, "make_combined_dicts should return right number of entries") + def test_update_summary(self): + files_bids = get_file_list(self.bids_base_dir, extensions=[".tsv"], name_suffix="_events") + tab_list = [] + skip_cols = ['onset', 'duration', 'sample', 'value'] + value_cols = ['stim_file', 'trial'] + tab_all = TabularSummary(skip_cols=skip_cols, value_cols=value_cols) + for name in files_bids: + tab = TabularSummary(skip_cols=skip_cols, value_cols=value_cols) + tab_list.append(tab) + df = get_new_dataframe(name) + tab.update(df, name=name) + self.assertEqual(tab.total_events, 200) + self.assertEqual(tab.total_files, 1) + tab_all.update_summary(tab) + self.assertEqual(len(files_bids), tab_all.total_files) + self.assertEqual(len(files_bids)*200, tab_all.total_events) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py index 099f80252..18ae48734 100644 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ b/tests/tools/remodeling/cli/test_run_remodel.py @@ -166,6 +166,12 @@ def test_run_bids_ops_verbose(self): main(arg_list) self.assertFalse(fp.getvalue()) + def test_temp(self): + data_root = "g:/ds002718OpenNeuro" + model_path = 'G:/wh_excerpt_rmdl.json' + arg_list = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-b', '-n', ''] + main(arg_list) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_base_context.py b/tests/tools/remodeling/operations/test_base_context.py index 0ac8bb548..68e8ddc0f 100644 --- a/tests/tools/remodeling/operations/test_base_context.py +++ b/tests/tools/remodeling/operations/test_base_context.py @@ -11,7 +11,7 @@ def __init__(self): self.summary_dict["data1"] = "test data 1" self.summary_dict["data2"] = "test data 2" - def _get_summary_details(self, include_individual=True): + def _get_details_dict(self, include_individual=True): summary = {"name": self.context_name} if include_individual: summary["more"] = "more stuff" diff --git a/tests/tools/util/test_schema_util.py b/tests/tools/util/test_schema_util.py new file mode 100644 index 000000000..8ee8d1210 --- /dev/null +++ b/tests/tools/util/test_schema_util.py @@ -0,0 +1,29 @@ +import os +import pandas as pd +import unittest +from hed.errors.exceptions import HedFileError +from hed.schema.hed_schema_io import load_schema_version +from hed.tools.util.schema_util import flatten_schema + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + pass + + @classmethod + def tearDownClass(cls): + pass + + def test_flatten_schema(self): + hed_schema = load_schema_version('8.1.0') + df = flatten_schema(hed_schema, skip_non_tag=True) + # df.to_csv("h:/Version_3_column.tsv", sep='\t', index=None) + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df.columns), 3) + self.assertEqual(len(df.index), 1037) + + +if __name__ == '__main__': + unittest.main()