diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index 6d7b49e77..4cfe71c4c 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -19,7 +19,7 @@ from .remodeling.dispatcher import Dispatcher from .remodeling.backup_manager import BackupManager -from .remodeling.operations.base_context import BaseContext +from .remodeling.operations.base_summary import BaseSummary from .remodeling.operations.base_op import BaseOp from .remodeling.operations.factor_column_op import FactorColumnOp from .remodeling.operations.factor_hed_tags_op import FactorHedTagsOp diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/tabular_column_name_summary.py similarity index 95% rename from hed/tools/analysis/column_name_summary.py rename to hed/tools/analysis/tabular_column_name_summary.py index 5c7a710c9..cd42651ae 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/tabular_column_name_summary.py @@ -3,7 +3,7 @@ import json -class ColumnNameSummary: +class TabularColumnNameSummary: def __init__(self, name=''): self.name = name self.file_dict = {} diff --git a/hed/tools/analysis/tabular_summary.py b/hed/tools/analysis/tabular_summary.py index 85dc44cd5..d9fd79702 100644 --- a/hed/tools/analysis/tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -113,6 +113,7 @@ def update(self, data, name=None): Parameters: data (DataFrame, str, or list): DataFrame containing data to update. + name (str): Name of the summary """ diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 06d664e91..8060fabb5 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -47,7 +47,7 @@ def __init__(self, operation_list, data_root=None, raise ValueError("InvalidOperationList", f"{these_errors}") self.parsed_ops = op_list self.hed_schema = get_schema(hed_versions) - self.context_dict = {} + self.summary_dicts = {} def get_summaries(self, file_formats=['.txt', '.json']): """ Return the summaries in a dictionary of strings suitable for saving or archiving. @@ -62,8 +62,8 @@ def get_summaries(self, file_formats=['.txt', '.json']): summary_list = [] time_stamp = '_' + get_timestamp() - for context_name, context_item in self.context_dict.items(): - file_base = context_item.context_filename + for context_name, context_item in self.summary_dicts.items(): + file_base = context_item.op.summary_filename if self.data_root: file_base = extract_suffix_path(self.data_root, file_base) file_base = clean_filename(file_base) @@ -171,7 +171,7 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s if not summary_dir: summary_dir = self.get_summary_save_dir() os.makedirs(summary_dir, exist_ok=True) - for context_name, context_item in self.context_dict.items(): + for context_name, context_item in self.summary_dicts.items(): context_item.save(summary_dir, save_formats, individual_summaries=individual_summaries) @staticmethod diff --git a/hed/tools/remodeling/operations/base_context.py b/hed/tools/remodeling/operations/base_summary.py similarity index 77% rename from hed/tools/remodeling/operations/base_context.py rename to hed/tools/remodeling/operations/base_summary.py index a7a29d356..aedb0429d 100644 --- a/hed/tools/remodeling/operations/base_context.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -1,4 +1,4 @@ -""" Abstract base class for the context of summary operations. """ +""" Abstract base class for the contents of summary operations. """ import os from abc import ABC, abstractmethod @@ -6,23 +6,19 @@ from hed.tools.util.io_util import get_timestamp -class BaseContext(ABC): - """ Abstract base class for summary contexts. Should not be instantiated. +class BaseSummary(ABC): + """ Abstract base class for summary contents. Should not be instantiated. Parameters: - context_type (str) Type of summary. - context_name (str) Printable name -- should be unique. - context_filename (str) Base filename for saving the context. + sum_op (BaseOp): Operation corresponding to this summary. """ DISPLAY_INDENT = " " INDIVIDUAL_SUMMARIES_PATH = 'individual_summaries' - def __init__(self, context_type, context_name, context_filename): - self.context_type = context_type - self.context_name = context_name - self.context_filename = context_filename + def __init__(self, sum_op): + self.op = sum_op self.summary_dict = {} def get_summary_details(self, include_individual=True): @@ -39,19 +35,19 @@ def get_summary_details(self, include_individual=True): - The 'Individual files' value is dictionary whose keys are file names and values are their corresponding summaries. - Users are expected to provide _merge_all and _get_details_dict to support this. + Users are expected to provide merge_all_info and get_details_dict to support this. """ - merged_summary = self._merge_all() + merged_summary = self.merge_all_info() if merged_summary: - details = self._get_details_dict(merged_summary) + details = self.get_details_dict(merged_summary) else: details = "Overall summary unavailable" summary_details = {"Dataset": details, "Individual files": {}} if include_individual: for name, count in self.summary_dict.items(): - summary_details["Individual files"][name] = self._get_details_dict(count) + summary_details["Individual files"][name] = self.get_details_dict(count) return summary_details def get_summary(self, individual_summaries="separate"): @@ -71,8 +67,8 @@ def get_summary(self, individual_summaries="separate"): """ include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_summary_details(include_individual=include_individual) - dataset_summary = {"Context name": self.context_name, "Context type": self.context_type, - "Context filename": self.context_filename, "Overall summary": summary_details['Dataset']} + dataset_summary = {"Summary name": self.op.summary_name, "Summary type": self.op.SUMMARY_TYPE, + "Summary filename": self.op.summary_filename, "Overall summary": summary_details['Dataset']} summary = {"Dataset": dataset_summary, "Individual files": {}} if summary_details["Individual files"]: summary["Individual files"] = self.get_individual(summary_details["Individual files"], @@ -83,8 +79,8 @@ def get_individual(self, summary_details, separately=True): individual_dict = {} for name, name_summary in summary_details.items(): if separately: - individual_dict[name] = {"Context name": self.context_name, "Context type": self.context_type, - "Context filename": self.context_filename, "File summary": name_summary} + individual_dict[name] = {"Summary name": self.op.summary_name, "summary type": self.op.SUMMARY_TYPE, + "Summary filename": self.op.summary_filename, "File summary": name_summary} else: individual_dict[name] = name_summary return individual_dict @@ -101,14 +97,16 @@ def get_text_summary_details(self, include_individual=True): def get_text_summary(self, individual_summaries="separate"): include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" summary_details = self.get_text_summary_details(include_individual=include_individual) - summary = {"Dataset": f"Context name: {self.context_name}\n" + f"Context type: {self.context_type}\n" + - f"Context filename: {self.context_filename}\n\n" + f"Overall summary:\n{summary_details['Dataset']}"} + summary = {"Dataset": f"Summary name: {self.op.summary_name}\n" + + f"Summary type: {self.op.SUMMARY_TYPE}\n" + + f"Summary filename: {self.op.summary_filename}\n\n" + + f"Overall summary:\n{summary_details['Dataset']}"} if individual_summaries == "separate": summary["Individual files"] = {} for name, name_summary in summary_details["Individual files"].items(): - summary["Individual files"][name] = f"Context name: {self.context_name}\n" + \ - f"Context type: {self.context_type}\n" + \ - f"Context filename: {self.context_filename}\n\n" + \ + summary["Individual files"][name] = f"Summary name: {self.op.summary_name}\n" + \ + f"Summary type: {self.op.SUMMARY_TYPE}\n" + \ + f"Summary filename: {self.op.summary_filename}\n\n" + \ f"Summary for {name}:\n{name_summary}" elif include_individual: ind_list = [] @@ -132,17 +130,17 @@ def save(self, save_dir, file_formats=['.txt'], individual_summaries="separate") def _save_summary_files(self, save_dir, file_format, summary, individual_summaries): """ Save the files in the appropriate format. - + Parameters: save_dir (str): Path to the directory in which the summaries will be saved. file_format (str): string representing the extension (including .), '.txt' or '.json'. summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys. - + """ time_stamp = '_' + get_timestamp() - this_save = os.path.join(save_dir, self.context_name + '/') + this_save = os.path.join(save_dir, self.op.summary_name + '/') os.makedirs(os.path.realpath(this_save), exist_ok=True) - filename = os.path.realpath(os.path.join(this_save, self.context_filename + time_stamp + file_format)) + filename = os.path.realpath(os.path.join(this_save, self.op.summary_filename + time_stamp + file_format)) individual = summary.get("Individual files", {}) if individual_summaries == "none" or not individual: self.dump_summary(filename, summary["Dataset"]) @@ -159,7 +157,7 @@ def _save_summary_files(self, save_dir, file_format, summary, individual_summari def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format): """ Return the filepath for the summary including the timestamp - + Parameters: individual_dir (str): path of the directory in which the summary should be stored. name (str): Path of the original file from which the summary was extracted. @@ -175,7 +173,7 @@ def _get_summary_filepath(self, individual_dir, name, time_stamp, file_format): match = True filename = None while match: - filename = f"{self.context_filename}_{this_name}_{count}{time_stamp}{file_format}" + filename = f"{self.op.summary_filename}_{this_name}_{count}{time_stamp}{file_format}" filename = os.path.realpath(os.path.join(individual_dir, filename)) if not os.path.isfile(filename): break @@ -207,7 +205,7 @@ def dump_summary(filename, summary): text_file.write(summary) @abstractmethod - def _get_details_dict(self, summary_info): + def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: @@ -217,30 +215,30 @@ def _get_details_dict(self, summary_info): dict: dictionary with the results. Notes: - Abstract method be implemented by each individual context summary. + Abstract method be implemented by each individual summary. """ raise NotImplementedError @abstractmethod - def _merge_all(self): + def merge_all_info(self): """ Return merged information. Returns: object: Consolidated summary of information. Notes: - Abstract method be implemented by each individual context summary. + Abstract method be implemented by each individual summary. """ raise NotImplementedError @abstractmethod - def update_context(self, context_dict): + def update_summary(self, summary_dict): """ Method to update summary for a given tabular input. Parameters: - context_dict (dict) A context specific dictionary with the update information. + summary_dict (dict) A summary specific dictionary with the update information. """ raise NotImplementedError diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index be2699066..d454bcc16 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -1,8 +1,8 @@ """ Summarize the column names in a collection of tabular files. """ -from hed.tools.analysis.column_name_summary import ColumnNameSummary +from hed.tools.analysis.tabular_column_name_summary import TabularColumnNameSummary from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary class SummarizeColumnNamesOp(BaseOp): @@ -60,44 +60,44 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: A new DataFrame with the factor columns appended. Side-effect: - Updates the context. + Updates the relevant summary. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = ColumnNameSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({"name": name, "column_names": list(df.columns)}) + summary = ColumnNameSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({"name": name, "column_names": list(df.columns)}) return df -class ColumnNameSummaryContext(BaseContext): +class ColumnNameSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - - The summary information is kept in separate ColumnNameSummary objects for each file. + - The summary information is kept in separate TabularColumnNameSummary objects for each file. - The summary needs a "name" str and a "column_names" list. - - The summary uses ColumnNameSummary as the summary object. + - The summary uses TabularColumnNameSummary as the summary object. """ - name = new_context['name'] + name = new_info['name'] if name not in self.summary_dict: - self.summary_dict[name] = ColumnNameSummary(name=name) - self.summary_dict[name].update(name, new_context["column_names"]) + self.summary_dict[name] = TabularColumnNameSummary(name=name) + self.summary_dict[name].update(name, new_info["column_names"]) - def _get_details_dict(self, column_summary): + def get_details_dict(self, column_summary): """ Return the summary dictionary extracted from a ColumnNameSummary. Parameters: - column_summary (ColumnNameSummary): A column name summary for the data file. + column_summary (TabularColumnNameSummary): A column name summary for the data file. Returns: dict - a dictionary with the summary information for column names. @@ -105,20 +105,20 @@ def _get_details_dict(self, column_summary): """ return column_summary.get_summary() - def _merge_all(self): - """ Create a ColumnNameSummary containing the overall dataset summary. + def merge_all_info(self): + """ Create a TabularColumnNameSummary containing the overall dataset summary. Returns: - ColumnNameSummary - the overall summary object for column names. + TabularColumnNameSummary - the overall summary object for column names. """ - all_sum = ColumnNameSummary(name='Dataset') + all_sum = TabularColumnNameSummary(name='Dataset') for key, counts in self.summary_dict.items(): for name, pos in counts.file_dict.items(): all_sum.update(name, counts.unique_headers[pos]) return all_sum - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -139,7 +139,7 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return f"{indent}{str(columns['Column names'])}" @staticmethod - def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all of the tabular files. Parameters: diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 000346565..a01dfc856 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -2,7 +2,7 @@ from hed.tools import TabularSummary from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary class SummarizeColumnValuesOp(BaseOp): @@ -14,6 +14,9 @@ class SummarizeColumnValuesOp(BaseOp): - **skip_columns** (*list*): Names of columns to skip in the summary. - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. + Optional remodeling parameters: + - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. + The purpose is to produce a summary of the values in a tabular file. """ @@ -27,10 +30,14 @@ class SummarizeColumnValuesOp(BaseOp): "value_columns": list }, "optional_parameters": { + "values_per_line": int, + "max_categorical": int } } SUMMARY_TYPE = 'column_values' + VALUES_PER_LINE = 5 + MAX_CATEGORICAL = 50 def __init__(self, parameters): """ Constructor for the summarize column values operation. @@ -54,6 +61,8 @@ def __init__(self, parameters): self.summary_filename = parameters['summary_filename'] self.skip_columns = parameters['skip_columns'] self.value_columns = parameters['value_columns'] + self.max_categorical = parameters.get('max_categorical', float('inf')) + self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE) def do_op(self, dispatcher, df, name, sidecar=None): """ Create factor columns corresponding to values in a specified column. @@ -68,43 +77,41 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: A new DataFrame with the factor columns appended. Side-effect: - Updates the context. + Updates the relevant summary. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = ColumnValueSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name}) + summary = ColumnValueSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name}) return df -class ColumnValueSummaryContext(BaseContext): +class ColumnValueSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) - self.value_columns = sum_op.value_columns - self.skip_columns = sum_op.skip_columns + super().__init__(sum_op) - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary information is kept in separate TabularSummary objects for each file. - The summary needs a "name" str and a "df" . """ - name = new_context['name'] + name = new_info['name'] if name not in self.summary_dict: self.summary_dict[name] = \ - TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name=name) - self.summary_dict[name].update(new_context['df']) + TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name=name) + self.summary_dict[name].update(new_info['df']) - def _get_details_dict(self, summary): + def get_details_dict(self, summary): """ Return a dictionary with the summary contained in a TabularSummary Parameters: @@ -114,21 +121,27 @@ def _get_details_dict(self, summary): dict: Dictionary with the information suitable for extracting printout. """ - return summary.get_summary(as_json=False) - - def _merge_all(self): + this_summary = summary.get_summary(as_json=False) + unique_counts = [(key, len(count_dict)) for key, count_dict in this_summary['Categorical columns'].items()] + this_summary['Categorical counts'] = dict(unique_counts) + for key, dict_entry in this_summary['Categorical columns'].items(): + num_disp, sorted_tuples = ColumnValueSummary.sort_dict(self, dict_entry, reverse=True) + this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)]) + return this_summary + + def merge_all_info(self): """ Create a TabularSummary containing the overall dataset summary. Returns: TabularSummary - the summary object for column values. """ - all_sum = TabularSummary(value_cols=self.value_columns, skip_cols=self.skip_columns, name='Dataset') + all_sum = TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name='Dataset') for key, counts in self.summary_dict.items(): all_sum.update_summary(counts) return all_sum - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -149,8 +162,29 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return self._get_dataset_string(result, indent=indent) return self._get_individual_string(result, indent=indent) - @staticmethod - def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_categorical_string(self, result, offset="", indent=" "): + """ Return a string with the summary for a particular categorical dictionary. + + Parameters: + cat_dict (dict): Dictionary of summary information for a particular tabular file. + offset (str): String of blanks used as offset for every item + indent (str): String of blanks used as the additional amount to indent an item's for readability. + + Returns: + str: Formatted string suitable for saving in a file or printing. + + """ + cat_dict = result.get('Categorical columns', {}) + if not cat_dict: + return "" + count_dict = result['Categorical counts'] + sum_list = [f"{offset}{indent}Categorical column values[Events, Files]:"] + sorted_tuples = sorted(cat_dict.items(), key=lambda x: x[0]) + for entry in sorted_tuples: + sum_list = sum_list + self._get_categorical_col(entry, count_dict, offset="", indent=" ") + return "\n".join(sum_list) + + def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all of the tabular files. Parameters: @@ -163,16 +197,16 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={result.get('Total files', 0)}"] - cat_cols = result.get("Categorical columns", {}) - if cat_cols: - sum_list.append(ColumnValueSummaryContext._get_categorical_string(cat_cols, offset="", indent=indent)) + cat_string = self._get_categorical_string(result, offset="", indent=indent) + if cat_string: + sum_list.append(cat_string) val_cols = result.get("Value columns", {}) if val_cols: - sum_list.append(ColumnValueSummaryContext._get_value_string(val_cols, offset="", indent=indent)) + sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent)) return "\n".join(sum_list) - @staticmethod - def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT): + """ Return a string with the summary for an individual tabular file. Parameters: @@ -186,22 +220,50 @@ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): sum_list = [f"Total events={result.get('Total events', 0)}"] cat_cols = result.get("Categorical columns", {}) if cat_cols: - sum_list.append(ColumnValueSummaryContext._get_categorical_string(cat_cols, offset=indent, indent=indent)) + sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent)) val_cols = result.get("Value columns", {}) if val_cols: - sum_list.append(ColumnValueSummaryContext._get_value_string(val_cols, offset=indent, indent=indent)) + sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset=indent, indent=indent)) return "\n".join(sum_list) + def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): + """ Return a string with the summary for a particular categorical column. + + Parameters: + dict_entry(tuple): (Name of the column, summary dict for that column) + offset(str): String of blanks used as offset for all items + indent (str): String of blanks used as the additional amount to indent for this item's readability. + + Returns: + list: Formatted strings, each corresponding to a line in the output. + """ + num_unique = count_dict[entry[0]] + num_disp = min(self.op.max_categorical, num_unique) + col_list = [f"{offset}{indent * 2}{entry[0]}: {num_unique} unique values " + f"(displaying top {num_disp} values)"] + # Create and partition the list of individual entries + value_list = [f"{item[0]}{str(item[1])}" for item in entry[1].items()] + value_list = value_list[:num_disp] + part_list = ColumnValueSummary.partition_list(value_list, self.op.values_per_line) + return col_list + [f"{offset}{indent * 3}{ColumnValueSummary.get_list_str(item)}" for item in part_list] + @staticmethod - def _get_categorical_string(cat_dict, offset="", indent=" "): - sum_list = [f"{offset}{indent}Categorical column values[Events, Files]:"] - for col_name, col_dict in cat_dict.items(): - sum_list.append(f"{offset}{indent*2}{col_name}:") - col_list = [] - for col_value, val_counts in col_dict.items(): - col_list.append(f"{col_value}{str(val_counts)}") - sum_list.append(f"{offset}{indent*3}{' '.join(col_list)}") - return "\n".join(sum_list) + def get_list_str(lst): + return f"{' '.join(str(item) for item in lst)}" + + @staticmethod + def partition_list(lst, n): + """ Partition a list into lists of n items. + + Parameters: + lst (list): List to be partitioned + n (int): Number of items in each sublist + + Returns: + list: list of lists of n elements, the last might have fewer. + + """ + return [lst[i:i + n] for i in range(0, len(lst), n)] @staticmethod def _get_value_string(val_dict, offset="", indent=""): @@ -209,3 +271,8 @@ def _get_value_string(val_dict, offset="", indent=""): for col_name, val_counts in val_dict.items(): sum_list.append(f"{offset}{indent*2}{col_name}{str(val_counts)}") return "\n".join(sum_list) + + @staticmethod + def sort_dict(self, count_dict, reverse=False): + sorted_tuples = sorted(count_dict.items(), key=lambda x: x[1][0], reverse=reverse) + return len(sorted_tuples), sorted_tuples diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index c6f6f7001..73c7d5957 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -2,7 +2,7 @@ from hed import TabularInput from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary from hed.models.def_expand_gather import DefExpandGatherer @@ -63,36 +63,36 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: A new DataFrame with the factor columns appended. Side-effect: - Updates the context. + Updates the relevant summary. """ - summary = dispatcher.context_dict.setdefault(self.summary_name, - DefinitionSummaryContext(self, dispatcher.hed_schema)) - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, 'sidecar': sidecar, + summary = dispatcher.summary_dicts.setdefault(self.summary_name, + DefinitionSummary(self, dispatcher.hed_schema)) + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name, 'sidecar': sidecar, 'schema': dispatcher.hed_schema}) return df -class DefinitionSummaryContext(BaseContext): +class DefinitionSummary(BaseSummary): def __init__(self, sum_op, hed_schema, known_defs=None): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a "schema" and a "Sidecar". """ - data_input = TabularInput(new_context['df'], sidecar=new_context['sidecar'], name=new_context['name']) - series, def_dict = data_input.series_a, data_input.get_def_dict(new_context['schema']) + data_input = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + series, def_dict = data_input.series_a, data_input.get_def_dict(new_info['schema']) self.def_gatherer.process_def_expands(series, def_dict) - def _get_details_dict(self, def_gatherer): + def get_details_dict(self, def_gatherer): """ Return the summary-specific information in a dictionary. Parameters: @@ -111,7 +111,7 @@ def build_summary_dict(items_dict, title, process_func, display_description=Fals if "#" in str(value): key = key + "/#" if display_description: - description, value = DefinitionSummaryContext.remove_description(value) + description, value = DefinitionSummary.remove_description(value) items[key] = {"description": description, "contents": str(value)} else: if isinstance(value, list): @@ -131,7 +131,7 @@ def build_summary_dict(items_dict, title, process_func, display_description=Fals known_defs_summary.update(errors_summary) return known_defs_summary - def _merge_all(self): + def merge_all_info(self): """ Create an Object containing the definition summary. Returns: @@ -140,7 +140,7 @@ def _merge_all(self): """ return self.def_gatherer - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -161,7 +161,7 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return self._get_individual_string(result, indent=indent) @staticmethod - def _get_dataset_string(summary_dict, indent=BaseContext.DISPLAY_INDENT): + def _get_dataset_string(summary_dict, indent=BaseSummary.DISPLAY_INDENT): def nested_dict_to_string(data, level=1): result = [] for key, value in data.items(): @@ -190,7 +190,7 @@ def remove_description(def_entry): return description, def_group @staticmethod - def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index faa259e7f..f44c70f21 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -4,7 +4,7 @@ from hed.models.sidecar import Sidecar from hed.tools.analysis.hed_tag_counts import HedTagCounts from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary from hed.models.df_util import get_assembled @@ -77,46 +77,46 @@ def do_op(self, dispatcher, df, name, sidecar=None): Updates the context. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = HedTagSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, + summary = HedTagSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df -class HedTagSummaryContext(BaseContext): +class HedTagSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) self.tags = sum_op.tags self.expand_context = sum_op.expand_context - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". """ - counts = HedTagCounts(new_context['name'], total_events=len(new_context['df'])) - sidecar = new_context['sidecar'] + counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) + sidecar = new_info['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) - input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name']) - hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'], + input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) + hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) # definitions = input_data.get_definitions().gathered_defs for hed in hed_strings: - counts.update_event_counts(hed, new_context['name']) - self.summary_dict[new_context["name"]] = counts + counts.update_event_counts(hed, new_info['name']) + self.summary_dict[new_info["name"]] = counts - def _get_details_dict(self, merge_counts): + def get_details_dict(self, merge_counts): """ Return the summary-specific information in a dictionary. Parameters: @@ -135,7 +135,7 @@ def _get_details_dict(self, merge_counts): "files": [name for name in merge_counts.files.keys()], "Main tags": details, "Other tags": leftovers} - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -155,7 +155,7 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return self._get_dataset_string(result, indent=indent) return self._get_individual_string(result, indent=indent) - def _merge_all(self): + def merge_all_info(self): """ Create a HedTagCounts containing the overall dataset HED tag summary. Returns: @@ -172,7 +172,7 @@ def _merge_all(self): return all_counts @staticmethod - def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all of the tabular files. Parameters: @@ -185,11 +185,11 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('total_events', 0)} " f"Total files={len(result.get('files', []))}"] - sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent) + sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod - def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: @@ -201,7 +201,7 @@ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): """ sum_list = [f"Total events={result.get('total_events', 0)}"] - sum_list = sum_list + HedTagSummaryContext._get_tag_list(result, indent=indent) + sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod @@ -212,15 +212,15 @@ def _tag_details(tags): return tag_list @staticmethod - def _get_tag_list(tag_info, indent=BaseContext.DISPLAY_INDENT): + def _get_tag_list(tag_info, indent=BaseSummary.DISPLAY_INDENT): sum_list = [f"\n{indent}Main tags[events,files]:"] for category, tags in tag_info['Main tags'].items(): sum_list.append(f"{indent}{indent}{category}:") if tags: - sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummaryContext._tag_details(tags))}") + sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") if tag_info['Other tags']: sum_list.append(f"{indent}Other tags[events,files]:") - sum_list.append(f"{indent}{indent}{' '.join(HedTagSummaryContext._tag_details(tag_info['Other tags']))}") + sum_list.append(f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") return sum_list @staticmethod diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index fd500b4bc..4cbb96675 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -7,7 +7,7 @@ from hed.tools.analysis.hed_type_counts import HedTypeCounts from hed.tools.analysis.hed_context_manager import HedContextManager from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary class SummarizeHedTypeOp(BaseOp): @@ -69,50 +69,50 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: Input DataFrame, unchanged. Side-effect: - Updates the context. + Updates the relevant summary. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = HedTypeSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, + summary = HedTypeSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df -class HedTypeSummaryContext(BaseContext): +class HedTypeSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) self.type_tag = sum_op.type_tag - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". """ - sidecar = new_context['sidecar'] + sidecar = new_info['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) - input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name']) - hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'], + input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) + hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], extra_def_dicts=None, join_columns=True, expand_defs=False) - context_manager = HedContextManager(hed_strings, new_context['schema']) - type_values = HedTypeValues(context_manager, definitions, new_context['name'], type_tag=self.type_tag) + context_manager = HedContextManager(hed_strings, new_info['schema']) + type_values = HedTypeValues(context_manager, definitions, new_info['name'], type_tag=self.type_tag) - counts = HedTypeCounts(new_context['name'], self.type_tag) - counts.update_summary(type_values.get_summary(), type_values.total_events, new_context['name']) + counts = HedTypeCounts(new_info['name'], self.type_tag) + counts.update_summary(type_values.get_summary(), type_values.total_events, new_info['name']) counts.add_descriptions(type_values.definitions) - self.summary_dict[new_context["name"]] = counts + self.summary_dict[new_info["name"]] = counts - def _get_details_dict(self, counts): + def get_details_dict(self, counts): """ Return the summary-specific information in a dictionary. Parameters: @@ -124,7 +124,7 @@ def _get_details_dict(self, counts): """ return counts.get_summary() - def _merge_all(self): + def merge_all_info(self): """ Create a HedTypeCounts containing the overall dataset HED type summary. Returns: @@ -136,7 +136,7 @@ def _merge_all(self): all_counts.update(counts) return all_counts - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -157,7 +157,7 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return self._get_individual_string(result, indent=indent) @staticmethod - def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all of the tabular files. Parameters: @@ -183,11 +183,11 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): str1 = str1 + f" Multiple references:{item['events_with_multiple_refs']})" sum_list.append(f"{indent}{key}: {str1}") if item['level_counts']: - sum_list = sum_list + HedTypeSummaryContext._level_details(item['level_counts'], indent=indent) + sum_list = sum_list + HedTypeSummary._level_details(item['level_counts'], indent=indent) return "\n".join(sum_list) @staticmethod - def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: @@ -212,8 +212,8 @@ def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): if str1: sum_list.append(f"{indent*3}{str1}") if item['level_counts']: - sum_list = sum_list + HedTypeSummaryContext._level_details(item['level_counts'], - offset=indent, indent=indent) + sum_list = sum_list + HedTypeSummary._level_details(item['level_counts'], + offset=indent, indent=indent) return "\n".join(sum_list) @staticmethod diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 8120371ad..f395e837e 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -5,8 +5,7 @@ from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext -from hed.validator import HedValidator +from hed.tools.remodeling.operations.base_summary import BaseSummary class SummarizeHedValidationOp(BaseOp): @@ -67,25 +66,25 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: Input DataFrame, unchanged. Side-effect: - Updates the context. + Updates the relevant summary. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = HedValidationSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, + summary = HedValidationSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name, 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df -class HedValidationSummaryContext(BaseContext): +class HedValidationSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) self.check_for_warnings = sum_op.check_for_warnings - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -115,11 +114,11 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): sum_list = sum_list + [f"{indent*2}Event file validation was incomplete because of sidecar errors"] return "\n".join(sum_list) - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str, a schema, a "df", and a "Sidecar". @@ -127,15 +126,15 @@ def update_context(self, new_context): results = self.get_empty_results() results["total_event_files"] = 1 - results["event_issues"][new_context["name"]] = [] - self.summary_dict[new_context["name"]] = results - sidecar = new_context.get('sidecar', None) + results["event_issues"][new_info["name"]] = [] + self.summary_dict[new_info["name"]] = results + sidecar = new_info.get('sidecar', None) filtered_issues = [] if sidecar: if not isinstance(sidecar, Sidecar): - sidecar = Sidecar(files=new_context['sidecar'], name=os.path.basename(sidecar)) + sidecar = Sidecar(files=new_info['sidecar'], name=os.path.basename(sidecar)) results["sidecar_issues"][sidecar.name] = [] - sidecar_issues = sidecar.validate(new_context['schema']) + sidecar_issues = sidecar.validate(new_info['schema']) filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR) if not self.check_for_warnings: sidecar_issues = filtered_issues @@ -144,14 +143,14 @@ def update_context(self, new_context): results['total_sidecar_files'] = 1 if not filtered_issues: results['validation_completed'] = True - input_data = TabularInput(new_context['df'], sidecar=sidecar) - issues = input_data.validate(new_context['schema']) + input_data = TabularInput(new_info['df'], sidecar=sidecar) + issues = input_data.validate(new_info['schema']) if not self.check_for_warnings: issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR) - results['event_issues'][new_context["name"]] = issues + results['event_issues'][new_info["name"]] = issues results['total_event_issues'] = len(issues) - def _get_details_dict(self, summary_info): + def get_details_dict(self, summary_info): """Return the summary details from the summary_info. Parameters: @@ -163,7 +162,7 @@ def _get_details_dict(self, summary_info): """ return summary_info - def _merge_all(self): + def merge_all_info(self): """ Create a dictionary containing all of the errors in the dataset. Returns: @@ -195,7 +194,7 @@ def get_empty_results(): "validation_completed": False} @staticmethod - def get_error_list(error_dict, count_only=False, indent=BaseContext.DISPLAY_INDENT): + def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDENT): error_list = [] for key, item in error_dict.items(): if count_only and isinstance(item, list): @@ -207,7 +206,7 @@ def get_error_list(error_dict, count_only=False, indent=BaseContext.DISPLAY_INDE else: error_list.append(f"{indent}{key} issues:") for this_item in item: - error_list.append(f"{indent*2}{HedValidationSummaryContext.format_error(this_item)}") + error_list.append(f"{indent*2}{HedValidationSummary.format_error(this_item)}") return error_list @staticmethod @@ -218,11 +217,11 @@ def format_errors(error_list): def format_error(error): error_str = error['code'] error_locations = [] - HedValidationSummaryContext.update_error_location(error_locations, "row", "ec_row", error) - HedValidationSummaryContext.update_error_location(error_locations, "column", "ec_column", error) - HedValidationSummaryContext.update_error_location(error_locations, "sidecar column", + HedValidationSummary.update_error_location(error_locations, "row", "ec_row", error) + HedValidationSummary.update_error_location(error_locations, "column", "ec_column", error) + HedValidationSummary.update_error_location(error_locations, "sidecar column", "ec_sidecarColumnName", error) - HedValidationSummaryContext.update_error_location(error_locations, "sidecar key", "ec_sidecarKeyName", error) + HedValidationSummary.update_error_location(error_locations, "sidecar key", "ec_sidecarKeyName", error) location_str = ",".join(error_locations) if location_str: error_str = error_str + f"[{location_str}]" diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 0a403ac4b..dc68bb065 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -3,7 +3,7 @@ import json from hed.tools import TabularSummary from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary class SummarizeSidecarFromEventsOp(BaseOp): @@ -68,40 +68,40 @@ def do_op(self, dispatcher, df, name, sidecar=None): DataFrame: A new DataFrame with the factor columns appended. Side-effect: - Updates the context. + Updates the associated summary if applicable. """ - summary = dispatcher.context_dict.get(self.summary_name, None) + summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: - summary = EventsToSidecarSummaryContext(self) - dispatcher.context_dict[self.summary_name] = summary - summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name}) + summary = EventsToSidecarSummary(self) + dispatcher.summary_dicts[self.summary_name] = summary + summary.update_summary({'df': dispatcher.post_proc_data(df), 'name': name}) return df -class EventsToSidecarSummaryContext(BaseContext): +class EventsToSidecarSummary(BaseSummary): def __init__(self, sum_op): - super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + super().__init__(sum_op) self.value_cols = sum_op.value_columns self.skip_cols = sum_op.skip_columns - def update_context(self, new_context): + def update_summary(self, new_info): """ Update the summary for a given tabular input file. Parameters: - new_context (dict): A dictionary with the parameters needed to update a summary. + new_info (dict): A dictionary with the parameters needed to update a summary. Notes: - The summary needs a "name" str and a "df". """ - tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_context["name"]) - tab_sum.update(new_context['df'], new_context['name']) - self.summary_dict[new_context["name"]] = tab_sum + tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) + tab_sum.update(new_info['df'], new_info['name']) + self.summary_dict[new_info["name"]] = tab_sum - def _get_details_dict(self, summary_info): + def get_details_dict(self, summary_info): """ Return the summary-specific information. Parameters: @@ -116,7 +116,7 @@ def _get_details_dict(self, summary_info): "total_events": summary_info.total_events, "skip_cols": summary_info.skip_cols, "sidecar": summary_info.extract_sidecar_template()} - def _merge_all(self): + def merge_all_info(self): """ Merge summary information from all of the files Returns: @@ -129,7 +129,7 @@ def _merge_all(self): all_sum.update_summary(tab_sum) return all_sum - def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): """ Return a formatted string with the summary for the indicated name. Parameters: @@ -151,7 +151,7 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): return self._get_individual_string(result, indent=indent) @staticmethod - def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the overall summary for all of the tabular files. Parameters: @@ -170,7 +170,7 @@ def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): return "\n".join(sum_list) @staticmethod - def _get_individual_string(result, indent=BaseContext.DISPLAY_INDENT): + def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ Return a string with the summary for an individual tabular file. Parameters: diff --git a/tests/tools/analysis/test_column_name_summary.py b/tests/tools/analysis/test_tabular_column_name_summary.py similarity index 78% rename from tests/tools/analysis/test_column_name_summary.py rename to tests/tools/analysis/test_tabular_column_name_summary.py index 31cb551c0..d2825fcb8 100644 --- a/tests/tools/analysis/test_column_name_summary.py +++ b/tests/tools/analysis/test_tabular_column_name_summary.py @@ -1,6 +1,6 @@ import json import unittest -from hed.tools.analysis.column_name_summary import ColumnNameSummary +from hed.tools.analysis.tabular_column_name_summary import TabularColumnNameSummary class Test(unittest.TestCase): @@ -17,16 +17,16 @@ def tearDownClass(cls): pass def test_constructor(self): - column_summary1 = ColumnNameSummary(name='Dataset') - self.assertIsInstance(column_summary1, ColumnNameSummary) + column_summary1 = TabularColumnNameSummary(name='Dataset') + self.assertIsInstance(column_summary1, TabularColumnNameSummary) self.assertEqual(column_summary1.name, 'Dataset') self.assertFalse(column_summary1.file_dict) self.assertFalse(column_summary1.unique_headers) - column_summary2 = ColumnNameSummary() - self.assertIsInstance(column_summary2, ColumnNameSummary) + column_summary2 = TabularColumnNameSummary() + self.assertIsInstance(column_summary2, TabularColumnNameSummary) def test_update(self): - column_summary = ColumnNameSummary() + column_summary = TabularColumnNameSummary() column_summary.update('run-01', self.columns1) column_summary.update('run-02', self.columns1) self.assertEqual(len(column_summary.unique_headers), 1) @@ -41,7 +41,7 @@ def test_update(self): self.assertEqual(context.exception.args[0], "FileHasChangedColumnNames") def test_update_headers(self): - column_summary = ColumnNameSummary() + column_summary = TabularColumnNameSummary() pos1 = column_summary.update_headers(self.columns1) self.assertEqual(pos1, 0) pos2 = column_summary.update_headers(self.columns1) @@ -50,7 +50,7 @@ def test_update_headers(self): self.assertEqual(pos3, 1) def test_get_summary(self): - column_summary = ColumnNameSummary('Dataset') + column_summary = TabularColumnNameSummary('Dataset') column_summary.update('run-01', self.columns1) column_summary.update('run-02', self.columns1) summary1 = column_summary.get_summary() diff --git a/tests/tools/analysis/test_temporal_event.py b/tests/tools/analysis/test_temporal_event.py index 8a057871e..ed3523a59 100644 --- a/tests/tools/analysis/test_temporal_event.py +++ b/tests/tools/analysis/test_temporal_event.py @@ -2,8 +2,7 @@ import unittest from hed import schema as hedschema -from hed.models import Sidecar, TabularInput, HedString, HedTag, HedGroup -from hed.tools import assemble_hed +from hed.models import HedString, HedGroup from hed.tools.analysis.temporal_event import TemporalEvent diff --git a/tests/tools/remodeling/operations/test_base_context.py b/tests/tools/remodeling/operations/test_base_summary.py similarity index 67% rename from tests/tools/remodeling/operations/test_base_context.py rename to tests/tools/remodeling/operations/test_base_summary.py index 68e8ddc0f..3dc2d10fc 100644 --- a/tests/tools/remodeling/operations/test_base_context.py +++ b/tests/tools/remodeling/operations/test_base_summary.py @@ -1,26 +1,46 @@ import os import shutil import unittest -from hed.tools.remodeling.operations.base_context import BaseContext +from hed.tools.remodeling.operations.base_summary import BaseSummary +from hed.tools.remodeling.operations.base_op import BaseOp -class TestContext(BaseContext): +class TestOp(BaseOp): + PARAMS = { + "operation": "test_summary_op", + "required_parameters": { + "summary_name": str, + "summary_filename": str + }, + "optional_parameters": {} + } - def __init__(self): - super().__init__("TestContext", "test", "test_context") + SUMMARY_TYPE = "test_sum" + + def __init__(self, parameters): + super().__init__(self.PARAMS, parameters) + self.summary_name = parameters['summary_name'] + self.summary_filename = parameters['summary_filename'] + + +class TestSummary(BaseSummary): + + def __init__(self, op): + + super().__init__(op) self.summary_dict["data1"] = "test data 1" self.summary_dict["data2"] = "test data 2" - def _get_details_dict(self, include_individual=True): - summary = {"name": self.context_name} + def get_details_dict(self, include_individual=True): + summary = {"name": self.op.summary_name} if include_individual: summary["more"] = "more stuff" return summary - def _merge_all(self): - return {"merged": self.context_name} + def merge_all_info(self): + return {"merged": self.op.summary_name} - def update_context(self, context_dict): + def update_summary(self, info_dict): pass @@ -29,19 +49,17 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): summary_dir = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../../data/remodel_tests/temp')) + '../../../data/remodel_tests/temp')) cls.summary_dir = summary_dir def test_constructor(self): - with self.assertRaises(TypeError) as context: - BaseContext('apple', 'banana', 'pear') - self.assertTrue(context.exception.args[0]) - - test = TestContext() - self.assertIsInstance(test, TestContext) + op2 = TestOp({"summary_name": "test", "summary_filename": "test_context"}) + test = TestSummary(op2) + self.assertIsInstance(test, TestSummary) def test_get_text_summary(self): - test = TestContext() + op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) + test = TestSummary(op) out1 = test.get_text_summary(individual_summaries="none") self.assertIsInstance(out1, dict) self.assertTrue(out1["Dataset"]) @@ -62,11 +80,12 @@ def test_save_no_ind(self): if os.path.isdir(self.summary_dir): shutil.rmtree(self.summary_dir) os.makedirs(self.summary_dir) - test1 = TestContext() + op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) + test1 = TestSummary(op) file_list1 = os.listdir(self.summary_dir) self.assertFalse(file_list1) test1.save(self.summary_dir, individual_summaries="none") - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.context_name + '/')) + dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + '/')) file_list2 = os.listdir(dir_full) self.assertEqual(len(file_list2), 1) basename = os.path.basename(file_list2[0]) @@ -78,32 +97,34 @@ def test_save_consolidated(self): if os.path.isdir(self.summary_dir): shutil.rmtree(self.summary_dir) os.makedirs(self.summary_dir) - test1 = TestContext() + op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) + test1 = TestSummary(op) file_list1 = os.listdir(self.summary_dir) self.assertFalse(file_list1) - dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.context_name + '/', + dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + '/', "individual_summaries/")) self.assertFalse(os.path.isdir(dir_ind)) test1.save(self.summary_dir, file_formats=['.json', '.tsv'], individual_summaries="consolidated") - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.context_name + '/')) + dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + '/')) file_list2 = os.listdir(dir_full) self.assertEqual(len(file_list2), 1) basename = os.path.basename(file_list2[0]) self.assertTrue(basename.startswith('test_context')) self.assertEqual(os.path.splitext(basename)[1], '.json') shutil.rmtree(self.summary_dir) - + def test_save_separate(self): if os.path.isdir(self.summary_dir): shutil.rmtree(self.summary_dir) os.makedirs(self.summary_dir) - test1 = TestContext() + op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) + test1 = TestSummary(op) file_list1 = os.listdir(self.summary_dir) self.assertFalse(file_list1) test1.save(self.summary_dir, file_formats=['.json', '.tsv'], individual_summaries="separate") - dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.context_name + '/', + dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + '/', "individual_summaries/")) - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.context_name + '/')) + dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + '/')) self.assertTrue(os.path.isdir(dir_ind)) file_list4 = os.listdir(dir_full) self.assertEqual(len(file_list4), 2) diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py index 4593a8aed..ddd5a8658 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_names_op.py @@ -2,9 +2,9 @@ import os import pandas as pd import unittest -from hed.tools.analysis.column_name_summary import ColumnNameSummary +from hed.tools.analysis.tabular_column_name_summary import TabularColumnNameSummary from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_column_names_op import ColumnNameSummaryContext, SummarizeColumnNamesOp +from hed.tools.remodeling.operations.summarize_column_names_op import ColumnNameSummary, SummarizeColumnNamesOp class Test(unittest.TestCase): @@ -68,7 +68,7 @@ def test_summary_op(self): self.assertEqual(len(df), old_len) df1 = df.drop(labels='onset', axis=1) sum_op.do_op(dispatch, df1, 'run-03') - this_context = dispatch.context_dict[sum_op.summary_name] + this_context = dispatch.summary_dicts[sum_op.summary_name] for key, item in this_context.summary_dict.items(): summary = item.get_summary() self.assertIsInstance(summary, dict) @@ -76,8 +76,8 @@ def test_summary_op(self): self.assertIsInstance(json_value, str) new_summary = json.loads(json_value) self.assertIsInstance(new_summary, dict) - merged1 = this_context._merge_all() - self.assertIsInstance(merged1, ColumnNameSummary) + merged1 = this_context.merge_all_info() + self.assertIsInstance(merged1, TabularColumnNameSummary) self.assertEqual(len(merged1.file_dict), 3) self.assertEqual(len(merged1.unique_headers), 2) with self.assertRaises(ValueError) as except_context: @@ -90,10 +90,10 @@ def test_summary(self): op = SummarizeColumnNamesOp(parms) df, df_new = self.get_dfs(op, 'run-01', dispatch) self.assertEqual(len(df), len(df_new)) - context_dict = dispatch.context_dict + context_dict = dispatch.summary_dicts self.assertIsInstance(context_dict, dict) self.get_dfs(op, 'run-02', dispatch) - context = dispatch.context_dict['columns'] + context = dispatch.summary_dicts['columns'] summary = context.get_summary() dataset_sum = summary['Dataset'] json_str = json.dumps(dataset_sum) @@ -110,8 +110,8 @@ def test_text_summary(self): op = SummarizeColumnNamesOp(parms) self.get_dfs(op, 'run-01', dispatch) self.get_dfs(op, 'run-02', dispatch) - context = dispatch.context_dict['columns'] - self.assertIsInstance(context, ColumnNameSummaryContext) + context = dispatch.summary_dicts['columns'] + self.assertIsInstance(context, ColumnNameSummary) text_summary1 = context.get_text_summary() self.assertIsInstance(text_summary1, dict) @@ -126,7 +126,7 @@ def test_multiple(self): op.do_op(dispatch, dispatch.prep_data(df1), 'run-03') df2 = pd.DataFrame(self.data1, columns=self.sample_columns2) op.do_op(dispatch, dispatch.prep_data(df2), 'run-05') - context = dispatch.context_dict['columns'] + context = dispatch.summary_dicts['columns'] summary = context.get_summary() text_summary1 = context.get_text_summary() self.assertEqual(len(summary), 2) diff --git a/tests/tools/remodeling/operations/test_summarize_column_values_op.py b/tests/tools/remodeling/operations/test_summarize_column_values_op.py index b6b305a4c..2d69655c0 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_values_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_values_op.py @@ -4,7 +4,7 @@ import unittest from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_column_values_op import \ - ColumnValueSummaryContext, SummarizeColumnValuesOp + ColumnValueSummary, SummarizeColumnValuesOp from hed.tools.util.io_util import get_file_list @@ -50,7 +50,7 @@ def test_do_ops(self): sum_op = SummarizeColumnValuesOp(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') self.get_dfs(sum_op, 'name1', dispatch) - context1 = dispatch.context_dict.get(parms['summary_name'], None) + context1 = dispatch.summary_dicts.get(parms['summary_name'], None) summary1 = context1.summary_dict['name1'] cat_len = len(summary1.categorical_info) self.assertEqual(cat_len, len(self.sample_columns) - 2, @@ -58,7 +58,7 @@ def test_do_ops(self): self.get_dfs(sum_op, 'name2', dispatch) self.assertEqual(cat_len, len(self.sample_columns) - 2, "do_ops updating does not change number of categorical columns.") - context = dispatch.context_dict['test summary'] + context = dispatch.summary_dicts['test summary'] self.assertEqual(len(context.summary_dict), 2) def test_get_summary(self): @@ -67,9 +67,9 @@ def test_get_summary(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') self.get_dfs(sum_op, 'name1', dispatch) - cont = dispatch.context_dict + cont = dispatch.summary_dicts context1 = cont.get("test summary", None) - self.assertIsInstance(context1, ColumnValueSummaryContext, "get_summary testing ColumnValueSummary") + self.assertIsInstance(context1, ColumnValueSummary, "get_summary testing ColumnValueSummary") # summary1 = context1.get_summary() # self.assertIsInstance(summary1, dict, "get_summary returns a dictionary") # self.assertIsInstance(summary1["Dataset"], dict) @@ -81,7 +81,7 @@ def test_get_summary(self): self.assertIsInstance(text_summary["Dataset"], str) self.get_dfs(sum_op, 'name2', dispatch) self.get_dfs(sum_op, 'name3', dispatch) - context2 = dispatch.context_dict.get(parms['summary_name'], None) + context2 = dispatch.summary_dicts.get(parms['summary_name'], None) summary2 = context2.get_summary() self.assertIsInstance(summary2, dict) text_summary2 = context2.get_text_summary(individual_summaries="consolidated") @@ -101,7 +101,7 @@ def test_summary_op(self): sum_op = parsed_commands[1] df = sum_op.do_op(dispatch, dispatch.prep_data(df), os.path.basename(events)) self.assertEqual(len(df), old_len) - context_dict = dispatch.context_dict + context_dict = dispatch.summary_dicts for key, item in context_dict.items(): text_value = item.get_text_summary() self.assertTrue(text_value) diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py index a55f61c6e..c01e949be 100644 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -3,7 +3,7 @@ import unittest import pandas as pd from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp, DefinitionSummaryContext +from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp, DefinitionSummary class Test(unittest.TestCase): @@ -44,8 +44,8 @@ def test_do_op(self): df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) self.assertEqual(200, len(df_new), " dataframe length is correct") self.assertEqual(10, len(df_new.columns), " has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.context_dict) - self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], DefinitionSummaryContext) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) def test_summary(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) @@ -55,9 +55,9 @@ def test_summary(self): df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) self.assertEqual(200, len(df_new), " dataframe length is correct") self.assertEqual(10, len(df_new.columns), " has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.context_dict) - self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], DefinitionSummaryContext) - # print(str(dispatch.context_dict[sum_op.summary_name].get_text_summary()['Dataset'])) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) + # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) def test_summary_errors(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) @@ -76,9 +76,9 @@ def test_summary_errors(self): "(Def-expand/A1/4, (Action/4, Age/5, Item-count/2))", ]}) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.context_dict) - self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], DefinitionSummaryContext) - #print(str(dispatch.context_dict[sum_op.summary_name].get_text_summary()['Dataset'])) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) + #print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index d5a298202..f6f88fe5e 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -4,7 +4,7 @@ import pandas as pd from hed.models.df_util import get_assembled from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummaryContext +from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummary class Test(unittest.TestCase): @@ -58,12 +58,12 @@ def test_do_op(self): df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) self.assertEqual(200, len(df_new), "summarize_hed_type_op dataframe length is correct") self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.context_dict) - self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], HedTagSummaryContext) - x = dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'] - self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedTagSummary) + x = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) def test_quick3(self): from hed.models import TabularInput, Sidecar @@ -125,9 +125,9 @@ def test_get_summary_details(self): self.assertIsInstance(sum_op, SummarizeHedTagsOp, "constructor creates an object of the correct type") df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.context_dict) - sum_context = dispatch.context_dict[sum_op.summary_name] - self.assertIsInstance(sum_context, HedTagSummaryContext) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + sum_context = dispatch.summary_dicts[sum_op.summary_name] + self.assertIsInstance(sum_context, HedTagSummary) sum_obj1 = sum_context.get_summary_details() self.assertIsInstance(sum_obj1, dict) json_str1 = json.dumps(sum_obj1, indent=4) @@ -135,7 +135,7 @@ def test_get_summary_details(self): json_obj1 = json.loads(json_str1) self.assertIsInstance(json_obj1, dict) sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - sum_context2 = dispatch.context_dict[sum_op.summary_name] + sum_context2 = dispatch.summary_dicts[sum_op.summary_name] sum_obj2 = sum_context2.get_summary_details() json_str2 = json.dumps(sum_obj2, indent=4) self.assertIsInstance(json_str2, str) @@ -150,7 +150,7 @@ def test_get_summary_text_summary(self): df = dispatch.prep_data(df) sum_op.do_op(dispatch, df, 'subj2_run1', sidecar=self.json_path) sum_op.do_op(dispatch, df, 'subj2_run2', sidecar=self.json_path) - sum_context1 = dispatch.context_dict[sum_op.summary_name] + sum_context1 = dispatch.summary_dicts[sum_op.summary_name] text_sum_none = sum_context1.get_text_summary(individual_summaries="none") self.assertIn('Dataset', text_sum_none) self.assertIsInstance(text_sum_none['Dataset'], str) @@ -203,7 +203,7 @@ def test_sample_example(self): df = dispatch.prep_data(df) for operation in dispatch.parsed_ops: df = operation.do_op(dispatch, df, "sample", sidecar=sidecar_path) - context_dict = dispatch.context_dict.get("summarize_hed_tags") + context_dict = dispatch.summary_dicts.get("summarize_hed_tags") text_summary = context_dict.get_text_summary() self.assertIsInstance(text_summary["Dataset"], str) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py index c7b18ad90..faa3b79bd 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py @@ -5,7 +5,7 @@ from hed.models import Sidecar from hed.schema import load_schema_version from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp, HedTypeSummaryContext +from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp, HedTypeSummary class Test(unittest.TestCase): @@ -66,14 +66,14 @@ def test_summary(self): parsed_commands, errors = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path) - context1 = dispatch.context_dict['AOMIC_condition_variables'] + context1 = dispatch.summary_dicts['AOMIC_condition_variables'] summary1 = context1.get_summary() self.assertIn('run-01', summary1['Individual files']) self.assertEqual(len(summary1['Individual files']), 1) summary1a = context1.get_summary() self.assertIsInstance(summary1a['Dataset'], dict) sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-02', sidecar=self.sidecar_path) - context2 = dispatch.context_dict['AOMIC_condition_variables'] + context2 = dispatch.summary_dicts['AOMIC_condition_variables'] summary2 = context2.get_summary(individual_summaries="separate") self.assertEqual(summary2['Dataset']['Overall summary']['files'][0], 'run-01') self.assertEqual(len(summary2['Dataset']['Overall summary']['files']), 2) @@ -88,7 +88,7 @@ def test_text_summary_with_levels(self): parsed_commands, errors = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path_wh) - context1 = dispatch.context_dict['AOMIC_condition_variables'] + context1 = dispatch.summary_dicts['AOMIC_condition_variables'] text_summary1 = context1.get_text_summary() self.assertIsInstance(text_summary1, dict) @@ -105,14 +105,14 @@ def test_text_summary(self): sum_op = parsed_commands[2] df = sum_op.do_op(dispatch, dispatch.prep_data(df), os.path.basename(self.events), sidecar=sidecar) self.assertEqual(len(df), old_len) - context_dict = dispatch.context_dict + context_dict = dispatch.summary_dicts self.assertIsInstance(context_dict, dict) - context1 = dispatch.context_dict['AOMIC_condition_variables'] - self.assertIsInstance(context1, HedTypeSummaryContext) + context1 = dispatch.summary_dicts['AOMIC_condition_variables'] + self.assertIsInstance(context1, HedTypeSummary) text_summary1 = context1.get_text_summary() self.assertIsInstance(text_summary1, dict) sum_op.do_op(dispatch, dispatch.prep_data(df), 'new_events', sidecar=sidecar) - context2 = dispatch.context_dict['AOMIC_condition_variables'] + context2 = dispatch.summary_dicts['AOMIC_condition_variables'] text_summary2 = context2.get_text_summary() self.assertIsInstance(text_summary2, dict) self.assertEqual(len(text_summary1["Individual files"]), 1) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py index 451528428..7b76e7c1c 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py @@ -4,7 +4,7 @@ import pandas as pd from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp, \ - HedValidationSummaryContext + HedValidationSummary class Test(unittest.TestCase): @@ -54,15 +54,15 @@ def test_do_op(self): self.assertIsInstance(sum_op, SummarizeHedValidationOp, "constructor creates an object of the correct type") df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.context_dict) - self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], HedValidationSummaryContext) - sub1 = dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'] + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedValidationSummary) + sub1 = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] self.assertEqual(len(sub1['event_issues']), 1) sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) self.assertEqual(len(sub1['event_issues']), 1) sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run3', sidecar=self.bad_json_path) - self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict), 3) - run3 = dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run3'] + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict), 3) + run3 = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run3'] self.assertEqual(run3["total_sidecar_issues"], 4) def test_get_summary_details(self): @@ -71,7 +71,7 @@ def test_get_summary_details(self): sum_op = SummarizeHedValidationOp(parms) df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - sum_context = dispatch.context_dict[sum_op.summary_name] + sum_context = dispatch.summary_dicts[sum_op.summary_name] sum_obj1 = sum_context.get_summary_details() self.assertIsInstance(sum_obj1, dict) json_str1 = json.dumps(sum_obj1, indent=4) @@ -79,7 +79,7 @@ def test_get_summary_details(self): json_obj1 = json.loads(json_str1) self.assertIsInstance(json_obj1, dict) sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - sum_context2 = dispatch.context_dict[sum_op.summary_name] + sum_context2 = dispatch.summary_dicts[sum_op.summary_name] sum_obj2 = sum_context2.get_summary_details() json_str2 = json.dumps(sum_obj2, indent=4) self.assertIsInstance(json_str2, str) @@ -97,7 +97,7 @@ def test_get_summary_text_summary(self): df = dispatch.prep_data(df) sum_op.do_op(dispatch, df, 'subj2_run1', sidecar=self.bad_json_path) - sum_context1 = dispatch.context_dict[sum_op.summary_name] + sum_context1 = dispatch.summary_dicts[sum_op.summary_name] text_sum1 = sum_context1.get_text_summary(individual_summaries="separate") sum_op.do_op(dispatch, df, 'subj2_run2', sidecar=self.json_path) sum_op.do_op(dispatch, df, 'subj2_run3', sidecar=self.bad_json_path) @@ -112,7 +112,7 @@ def test_with_sample_data(self): parms = json.loads(self.json_parms) sum_op = SummarizeHedValidationOp(parms) sum_op.do_op(dispatch, df, 'sub-0013_task-stopsignal_acq-seq_events.tsv', sidecar=self.sample_sidecar_path) - sum_context1 = dispatch.context_dict[sum_op.summary_name] + sum_context1 = dispatch.summary_dicts[sum_op.summary_name] if __name__ == '__main__': diff --git a/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py b/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py index cd02b4962..7f4fc6cdb 100644 --- a/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py +++ b/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py @@ -2,7 +2,7 @@ import pandas as pd import unittest from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import EventsToSidecarSummaryContext, \ +from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import EventsToSidecarSummary, \ SummarizeSidecarFromEventsOp @@ -42,7 +42,7 @@ def test_do_ops(self): df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) df1a = pd.DataFrame(self.sample_data, columns=self.sample_columns) sum_op.do_op(dispatch, dispatch.prep_data(df1), 'name1') - context1 = dispatch.context_dict.get(self.base_parameters['summary_name'], None) + context1 = dispatch.summary_dicts.get(self.base_parameters['summary_name'], None) summary = context1.summary_dict["name1"] cat_len = len(summary.categorical_info) cat_base = len(self.sample_columns) - len(self.base_parameters["skip_columns"]) - \ @@ -57,8 +57,8 @@ def test_get_summary(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) sum_op.do_op(dispatch, dispatch.prep_data(df1), 'name1') - context1 = dispatch.context_dict.get(self.base_parameters['summary_name'], None) - self.assertIsInstance(context1, EventsToSidecarSummaryContext, "get_summary testing EventsToSidecarSummary") + context1 = dispatch.summary_dicts.get(self.base_parameters['summary_name'], None) + self.assertIsInstance(context1, EventsToSidecarSummary, "get_summary testing EventsToSidecarSummary") summary1 = context1.get_summary() self.assertIsInstance(summary1, dict, "get_summary returns a dictionary by default") self.assertIsInstance(summary1["Dataset"], dict) @@ -76,8 +76,8 @@ def test_get_summary(self): self.assertIsInstance(summary_text5, dict) self.assertGreater(len(summary_text4['Dataset']), len(summary_text5['Dataset'])) sum_op.do_op(dispatch, dispatch.prep_data(df1), 'name2') - context2 = dispatch.context_dict.get(self.base_parameters['summary_name'], None) - self.assertIsInstance(context1, EventsToSidecarSummaryContext, "get_summary testing EventsToSidecarSummary") + context2 = dispatch.summary_dicts.get(self.base_parameters['summary_name'], None) + self.assertIsInstance(context1, EventsToSidecarSummary, "get_summary testing EventsToSidecarSummary") summary_text6 = context2.get_text_summary(individual_summaries="separate") self.assertIsInstance(summary_text6, dict)