From 5d8044e70e2a4e47ee98bbfb3734c5c948597518 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Tue, 22 Aug 2023 15:52:39 -0500 Subject: [PATCH 1/4] Updating tests for ErrorManager --- hed/models/base_input.py | 6 +- hed/models/column_mapper.py | 4 +- hed/models/def_expand_gather.py | 12 +- hed/models/definition_dict.py | 26 ++-- hed/models/df_util.py | 12 +- hed/models/hed_string.py | 4 +- hed/models/model_constants.py | 2 +- hed/models/sidecar.py | 12 +- hed/models/tabular_input.py | 2 +- hed/models/timeseries_input.py | 2 +- hed/tools/__init__.py | 4 +- hed/tools/analysis/__init__.py | 4 +- hed/tools/analysis/analysis_util.py | 12 +- hed/tools/analysis/event_manager.py | 42 ++--- hed/tools/analysis/event_manager_copy.py | 147 ------------------ hed/tools/analysis/hed_context_manager_new.py | 133 ---------------- hed/tools/analysis/hed_tag_counts.py | 2 +- ...d_type_definitions.py => hed_type_defs.py} | 21 ++- hed/tools/analysis/hed_type_factors.py | 4 +- hed/tools/analysis/hed_type_manager.py | 17 +- .../{hed_type_values.py => hed_types.py} | 74 ++++----- hed/tools/analysis/temporal_event.py | 2 +- hed/tools/bids/bids_file_group.py | 4 +- .../operations/summarize_definitions_op.py | 10 +- .../operations/summarize_hed_tags_op.py | 2 +- .../operations/summarize_hed_type_op.py | 6 +- hed/validator/def_validator.py | 4 +- hed/validator/hed_validator.py | 2 +- hed/validator/sidecar_validator.py | 4 +- hed/validator/spreadsheet_validator.py | 2 +- hed/validator/tag_validator.py | 4 +- spec_tests/test_errors.py | 2 +- tests/models/test_base_input.py | 2 +- tests/models/test_spreadsheet_input.py | 2 +- .../test_analysis_util_assemble_hed.py | 6 +- .../analysis/test_analysis_util_convert.py | 5 +- ...est_analysis_util_get_assembled_strings.py | 2 - tests/tools/analysis/test_annotation_util.py | 1 - tests/tools/analysis/test_event_manager.py | 1 - tests/tools/analysis/test_hed_tag_counts.py | 2 +- tests/tools/analysis/test_hed_type_counts.py | 6 +- ...e_definitions.py => test_hed_type_defs.py} | 35 +++-- tests/tools/analysis/test_hed_type_factors.py | 12 +- ...t_hed_type_values.py => test_hed_types.py} | 84 +++++----- tests/tools/analysis/test_hed_types_temp.py | 52 +++++++ tests/tools/analysis/test_tabular_summary.py | 19 ++- tests/tools/analysis/test_temporal_event.py | 1 - tests/validator/test_hed_validator.py | 2 +- 48 files changed, 298 insertions(+), 518 deletions(-) delete mode 100644 hed/tools/analysis/event_manager_copy.py delete mode 100644 hed/tools/analysis/hed_context_manager_new.py rename hed/tools/analysis/{hed_type_definitions.py => hed_type_defs.py} (85%) rename hed/tools/analysis/{hed_type_values.py => hed_types.py} (80%) rename tests/tools/analysis/{test_hed_type_definitions.py => test_hed_type_defs.py} (86%) rename tests/tools/analysis/{test_hed_type_values.py => test_hed_types.py} (69%) create mode 100644 tests/tools/analysis/test_hed_types_temp.py diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 0e7190498..69c345958 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -172,7 +172,7 @@ def expand_defs(self, hed_schema, def_dict): Parameters: hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand + def_dict (DefinitionDict): The type_defs to expand """ from df_util import expand_defs expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns()) @@ -325,7 +325,7 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No Parameters: hed_schema(HedSchema): The schema to use for validation - extra_def_dicts(list of DefDict or DefDict): all definitions to use for validation + extra_def_dicts(list of DefDict or DefDict): all type_defs to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None Returns: @@ -470,7 +470,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): Note: Baseclass implementation returns just extra_def_dicts. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions(if needed) + hed_schema(HedSchema): used to identify tags to find type_defs(if needed) extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 761ab81a9..4cf66619f 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -23,7 +23,7 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None Parameters: sidecar (Sidecar): A sidecar to gather column data from. tag_columns: (list): A list of ints or strings containing the columns that contain the HED tags. - Sidecar column definitions will take precedent if there is a conflict with tag_columns. + Sidecar column type_defs will take precedent if there is a conflict with tag_columns. column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag prefixes to prepend to the tags in that column before processing. optional_tag_columns (list): A list of ints or strings containing the columns that contain @@ -383,7 +383,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Return def dicts from every column description. Parameters: - hed_schema (Schema): A HED schema object to use for extracting definitions. + hed_schema (Schema): A HED schema object to use for extracting type_defs. extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 662ec2e54..c3626a9a5 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -81,14 +81,14 @@ def get_group(self): class DefExpandGatherer: - """Class for gathering definitions from a series of def-expands, including possibly ambiguous ones""" + """Class for gathering type_defs from a series of def-expands, including possibly ambiguous ones""" def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None): """Initialize the DefExpandGatherer class. Parameters: hed_schema (HedSchema): The HED schema to be used for processing. - known_defs (dict, optional): A dictionary of known definitions. - ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. + known_defs (dict, optional): A dictionary of known type_defs. + ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand type_defs. """ self.hed_schema = hed_schema @@ -101,10 +101,10 @@ def process_def_expands(self, hed_strings, known_defs=None): Parameters: hed_strings (pd.Series or list): A Pandas Series or list of HED strings to be processed. - known_defs (dict, optional): A dictionary of known definitions to be added. + known_defs (dict, optional): A dictionary of known type_defs to be added. Returns: - tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. + tuple: A tuple containing the DefinitionDict, ambiguous type_defs, and errors. """ if not isinstance(hed_strings, pd.Series): hed_strings = pd.Series(hed_strings) @@ -120,7 +120,7 @@ def process_def_expands(self, hed_strings, known_defs=None): return self.def_dict, self.ambiguous_defs, self.errors def _process_def_expand(self, string): - """Process a single HED string to extract definitions and handle known and ambiguous definitions. + """Process a single HED string to extract type_defs and handle known and ambiguous type_defs. Parameters: string (str): The HED string to be processed. diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 45c5947c2..b450bff8d 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -7,7 +7,7 @@ class DefinitionDict: - """ Gathers definitions from a single source. + """ Gathers type_defs from a single source. """ @@ -16,7 +16,7 @@ def __init__(self, def_dicts=None, hed_schema=None): Parameters: def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or - a single string whose definitions should be added. + a single string whose type_defs should be added. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: @@ -30,10 +30,10 @@ def __init__(self, def_dicts=None, hed_schema=None): self.add_definitions(def_dicts, hed_schema) def add_definitions(self, def_dicts, hed_schema=None): - """ Add definitions from dict(s) to this dict. + """ Add type_defs from dict(s) to this dict. Parameters: - def_dicts (list or DefinitionDict): DefDict or list of DefDicts/strings whose definitions should be added. + def_dicts (list or DefinitionDict): DefDict or list of DefDicts/strings whose type_defs should be added. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: @@ -61,10 +61,10 @@ def _add_definition(self, def_tag, def_value): self.defs[def_tag] = def_value def _add_definitions_from_dict(self, def_dict): - """ Add the definitions found in the given definition dictionary to this mapper. + """ Add the type_defs found in the given definition dictionary to this mapper. Parameters: - def_dict (DefinitionDict): DefDict whose definitions should be added. + def_dict (DefinitionDict): DefDict whose type_defs should be added. """ for def_tag, def_value in def_dict.items(): @@ -90,29 +90,29 @@ def __len__(self): return len(self.defs) def items(self): - """ Returns the dictionary of definitions + """ Returns the dictionary of type_defs Alias for .defs.items() Returns: - def_entries({str: DefinitionEntry}): A list of definitions + def_entries({str: DefinitionEntry}): A list of type_defs """ return self.defs.items() @property def issues(self): - """Returns issues about duplicate definitions.""" + """Returns issues about duplicate type_defs.""" return self._issues def check_for_definitions(self, hed_string_obj, error_handler=None): """ Check string for definition tags, adding them to self. Parameters: - hed_string_obj (HedString): A single hed string to gather definitions from. - error_handler (ErrorHandler or None): Error context used to identify where definitions are found. + hed_string_obj (HedString): A single hed string to gather type_defs from. + error_handler (ErrorHandler or None): Error context used to identify where type_defs are found. Returns: - list: List of issues encountered in checking for definitions. Each issue is a dictionary. + list: List of issues encountered in checking for type_defs. Each issue is a dictionary. """ def_issues = [] for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}): @@ -300,7 +300,7 @@ def get_as_strings(def_dict): """ Convert the entries to strings of the contents Parameters: - def_dict(DefinitionDict or dict): A dict of definitions + def_dict(DefinitionDict or dict): A dict of type_defs Returns: dict(str: str): definition name and contents diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 0a9373d1e..34a891f84 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -29,7 +29,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_ Returns: tuple: hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings - def_dict(DefinitionDict): The definitions from this Sidecar + def_dict(DefinitionDict): The type_defs from this Sidecar """ if isinstance(sidecar, str): sidecar = Sidecar(sidecar) @@ -105,7 +105,7 @@ def expand_defs(df, hed_schema, def_dict, columns=None): Parameters: df (pd.Dataframe or pd.Series): The dataframe or series to modify hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand + def_dict (DefinitionDict): The type_defs to expand columns (list or None): The columns to modify on the dataframe """ if isinstance(df, pd.Series): @@ -133,18 +133,18 @@ def _expand_defs(hed_string, hed_schema, def_dict): def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): - """ Gather def-expand tags in the strings/compare with known definitions to find any differences + """ Gather def-expand tags in the strings/compare with known type_defs to find any differences Parameters: hed_strings (list or pd.Series): A list of HED strings to process. hed_schema (HedSchema): The schema to use known_defs (DefinitionDict or list or str or None): - A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must + A DefinitionDict or anything its constructor takes. These are the known type_defs going in, that must match perfectly. - ambiguous_defs (dict): A dictionary containing ambiguous definitions + ambiguous_defs (dict): A dictionary containing ambiguous type_defs format TBD. Currently def name key: list of lists of HED tags values Returns: - tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. + tuple: A tuple containing the DefinitionDict, ambiguous type_defs, and errors. """ from hed.models.def_expand_gather import DefExpandGatherer diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index 328316868..cae95c46e 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -112,7 +112,7 @@ def copy(self): def remove_definitions(self): """ Remove definition tags and groups from this string. - This does not validate definitions and will blindly removing invalid ones as well. + This does not validate type_defs and will blindly removing invalid ones as well. """ definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1) if definition_groups: @@ -176,7 +176,7 @@ def split_into_groups(hed_string, hed_schema, def_dict=None): Parameters: hed_string (str): A hed string consisting of tags and tag groups to be processed. hed_schema (HedSchema): HED schema to use to identify tags. - def_dict(DefinitionDict): The definitions to identify + def_dict(DefinitionDict): The type_defs to identify Returns: list: A list of HedTag and/or HedGroup. diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py index 5fdb54cda..3aed6608a 100644 --- a/hed/models/model_constants.py +++ b/hed/models/model_constants.py @@ -5,7 +5,7 @@ class DefTagNames: - """ Source names for definitions, def labels, and expanded labels""" + """ Source names for type_defs, def labels, and expanded labels""" DEF_ORG_KEY = 'Def' DEF_EXPAND_ORG_KEY = 'Def-expand' diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index d7d77a09b..be4ed9614 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -54,12 +54,12 @@ def all_hed_columns(self): @property def def_dict(self): - """This is the definitions from this sidecar. + """This is the type_defs from this sidecar. - Generally you should instead call get_def_dict to get the relevant definitions + Generally you should instead call get_def_dict to get the relevant type_defs Returns: - DefinitionDict: The definitions for this sidecar + DefinitionDict: The type_defs for this sidecar """ return self._def_dict @@ -76,7 +76,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): used to identify tags to find type_defs extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: @@ -192,14 +192,14 @@ def _load_json_file(self, fp): raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e def extract_definitions(self, hed_schema, error_handler=None): - """ Gather and validate definitions in metadata. + """ Gather and validate type_defs in metadata. Parameters: hed_schema (HedSchema): The schema to used to identify tags. error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None. Returns: - DefinitionDict: Contains all the definitions located in the sidecar. + DefinitionDict: Contains all the type_defs located in the sidecar. """ if error_handler is None: diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index cd3172126..310069f32 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -58,7 +58,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): used to identify tags to find type_defs extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/timeseries_input.py b/hed/models/timeseries_input.py index 0b9cbee18..125800d18 100644 --- a/hed/models/timeseries_input.py +++ b/hed/models/timeseries_input.py @@ -17,7 +17,7 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None, name=None): name (str): The name to display for this file for error purposes. Notes: - - The extra_def_dicts are external definitions that override the ones in the object. + - The extra_def_dicts are external type_defs that override the ones in the object. """ diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index d3a135e2c..8c1305781 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -2,9 +2,9 @@ from .analysis.file_dictionary import FileDictionary # from .analysis.hed_context_manager import OnsetGroup, HedContextManager -from .analysis.hed_type_definitions import HedTypeDefinitions +from .analysis.hed_type_defs import HedTypeDefs from .analysis.hed_type_factors import HedTypeFactors -from .analysis.hed_type_values import HedTypeValues +from .analysis.hed_types import HedTypes from .analysis.hed_type_manager import HedTypeManager from .analysis.hed_type_counts import HedTypeCount from .analysis.key_map import KeyMap diff --git a/hed/tools/analysis/__init__.py b/hed/tools/analysis/__init__.py index 124390237..520305f19 100644 --- a/hed/tools/analysis/__init__.py +++ b/hed/tools/analysis/__init__.py @@ -1,9 +1,9 @@ """ Basic analysis tools. """ from .file_dictionary import FileDictionary # from .hed_context_manager import OnsetGroup, HedContextManager -from .hed_type_definitions import HedTypeDefinitions +from .hed_type_defs import HedTypeDefs from .hed_type_factors import HedTypeFactors -from .hed_type_values import HedTypeValues +from .hed_types import HedTypes from .hed_type_manager import HedTypeManager from .hed_type_counts import HedTypeCount from .key_map import KeyMap diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py index 37f2b9b9d..64f44e962 100644 --- a/hed/tools/analysis/analysis_util.py +++ b/hed/tools/analysis/analysis_util.py @@ -14,11 +14,11 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs Parameters: data_input (TabularInput): The tabular input file whose HED annotations are to be assembled. - sidecar (Sidecar): Sidecar with definitions. + sidecar (Sidecar): Sidecar with type_defs. schema (HedSchema): Hed schema columns_included (list or None): A list of additional column names to include. If None, only the list of assembled tags is included. - expand_defs (bool): If True, definitions are expanded when the events are assembled. + expand_defs (bool): If True, type_defs are expanded when the events are assembled. Returns: DataFrame or None: A DataFrame with the assembled events. @@ -41,7 +41,7 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs else: df = data_input.dataframe[eligible_columns].copy(deep=True) df['HED_assembled'] = hed_string_list - # definitions = data_input.get_definitions().gathered_defs + # type_defs = data_input.get_definitions().gathered_defs return df, definitions @@ -113,7 +113,7 @@ def search_strings(hed_strings, queries, query_names=None): # Parameters: # table (TabularInput): The input file to be searched. # hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form. -# expand_defs (bool): If True, definitions are expanded when the events are assembled. +# expand_defs (bool): If True, type_defs are expanded when the events are assembled. # # Returns: # list: A list of HedString objects. @@ -139,7 +139,7 @@ def search_strings(hed_strings, queries, query_names=None): # """ # # eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included) -# hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, +# hed_list, type_defs = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, # shrink_defs=False, expand_defs=True) # expression = QueryParser(query) # hed_tags = [] @@ -187,7 +187,7 @@ def search_strings(hed_strings, queries, query_names=None): # list: A list of the removed Defs. # # Notes: -# - the hed_string_obj passed in no longer has definitions. +# - the hed_string_obj passed in no longer has type_defs. # # """ # to_remove = [] diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 648f96358..6722c3ef9 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -13,7 +13,7 @@ def __init__(self, input_data, hed_schema, extra_defs=None): Parameters: input_data (TabularInput): Represents an events file with its sidecar. hed_schema (HedSchema): HED schema used in this - extra_defs (DefinitionDict): Extra definitions not included in the input_data information. + extra_defs (DefinitionDict): Extra type_defs not included in the input_data information. :raises HedFileError: - if there are any unmatched offsets. @@ -25,12 +25,13 @@ def __init__(self, input_data, hed_schema, extra_defs=None): self.event_list = [[] for _ in range(len(input_data.dataframe))] self.hed_schema = hed_schema + self.input_data = input_data self.def_dict = input_data.get_def_dict(hed_schema, extra_def_dicts=extra_defs) self.onsets = input_data.dataframe['onset'].tolist() self.hed_strings = None # Remaining HED strings copy.deepcopy(hed_strings) - self.anchor_dict = {} + # self.anchor_dict = {} # Dictionary of definition names to list of TemporalEvent self._create_event_list(input_data) - self._create_anchor_list() + # self._create_anchor_dict() # def iter_context(self): # """ Iterate rows of context. @@ -44,24 +45,27 @@ def __init__(self, input_data, hed_schema, extra_defs=None): # for index in range(len(self.contexts)): # yield index, self.contexts[index] - def _create_anchor_list(self): - """ Populate the dictionary of def names to list of temporal events. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - for index, events in enumerate(self.event_list): - for event in events: - index_list = self.anchor_dict.get(event.anchor, []) - index_list.append(event) - self.anchor_dict[event.anchor] = index_list + # def _create_anchor_dict(self): + # """ Populate the dictionary of def names to list of temporal events. + # + # :raises HedFileError: + # - If the hed_strings contain unmatched offsets. + # + # Notes: + # + # """ + # for events in self.event_list: + # for event in events: + # elist = self.anchor_dict.get(event.anchor, []) + # elist.append(event) + # self.anchor_dict[event.anchor] = elist def _create_event_list(self, input_data): """ Populate the event_list with the events with temporal extent indexed by event number. + Parameters: + input_data (TabularInput): A tabular input that includes its relevant sidecar. + :raises HedFileError: - If the hed_strings contain unmatched offsets. @@ -109,7 +113,7 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): to_remove.append(tup[1]) hed.remove(to_remove) - def _set_event_contexts(self): + # def _set_event_contexts(self): """ Creates an event context for each hed string. Notes: @@ -124,7 +128,7 @@ def _set_event_contexts(self): # for i in range(len(self.hed_strings)): # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) # self.contexts = contexts - print("_set_event_contexts not implemented yet") + def _update_onset_list(self, group, onset_dict, event_index): """ Process one onset or offset group to create onset_list. diff --git a/hed/tools/analysis/event_manager_copy.py b/hed/tools/analysis/event_manager_copy.py deleted file mode 100644 index 9a8dd02fa..000000000 --- a/hed/tools/analysis/event_manager_copy.py +++ /dev/null @@ -1,147 +0,0 @@ -""" Manages events of temporal extent. """ - -from hed.tools.analysis.temporal_event import TemporalEvent -from hed.models.model_constants import DefTagNames - - -class EventManagerCopy: - - def __init__(self, input_data, hed_schema, extra_def_dict=None): - """ Create an event manager for an events file. Manages events of temporal extent. This - - Parameters: - hed_strings (list): A list of HED strings - onsets (list): A list of onset times that is the same length as hed_strings - def_dict (DefinitionDict): Contains the definitions for this dataset. - - :raises HedFileError: - - if there are any unmatched offsets. - - Notes: Keeps the events of temporal extend by their starting index in events file. These events - are separated from the rest of the annotations. - - """ - - self.event_list = [[] for _ in range(len(onsets))] - self.onsets = onsets - self.hed_strings = hed_strings ## copy.deepcopy(hed_strings) - self.def_dict = def_dict - self.anchor_dict ={} - self._create_event_list() - self._create_anchor_list() - - # def iter_context(self): - # """ Iterate rows of context. - # - # Yields: - # int: position in the dataFrame - # HedString: Context - # - # """ - # - # for index in range(len(self.contexts)): - # yield index, self.contexts[index] - - def _create_anchor_list(self): - """ Populate the dictionary of def names to list of temporal events. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - for index, events in enumerate(self.event_list): - for event in events: - index_list = self.anchor_dict.get(event.anchor, []) - index_list.append(event) - self.anchor_dict[event.anchor] = index_list - - def _create_event_list(self): - """ Populate the event_list with the events with temporal extent indexed by event number. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet. - for event_index, hed in enumerate(self.hed_strings): - self._extract_temporal_events(hed, event_index, onset_dict) - # Now handle the events that extend to end of list - for item in onset_dict.values(): - item.set_end(len(self.onsets), None) - - def _extract_temporal_events(self, hed, event_index, onset_dict): - """ Extract the temporal events and remove them from the other HED strings. - - Parameters: - hed (HedString): The assembled HedString at position event_index in the data. - event_index (int): The position of this string in the data. - onset_dict (dict): Running dict that keeps track of temporal events that haven't yet ended. - - Note: - This removes the events of temporal extent from the HED string. - - """ - if not hed: - return - group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY}, - include_groups=2) - to_remove = [] - for tup in group_tuples: - anchor_tag = tup[1].find_def_tags(recursive=False, include_groups=0)[0] - anchor = anchor_tag.extension.lower() - if anchor in onset_dict or tup[0].short_base_tag.lower() == DefTagNames.OFFSET_KEY: - temporal_event = onset_dict.pop(anchor) - temporal_event.set_end(event_index, self.onsets[event_index]) - if tup[0] == DefTagNames.ONSET_KEY: - new_event = TemporalEvent(tup[1], event_index, self.onsets[event_index]) - self.event_list[event_index].append(new_event) - onset_dict[anchor] = new_event - to_remove.append(tup[1]) - hed.remove(to_remove) - - def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in an event context group, but is kept in a separate array without the - event context group or tag. - - """ - # contexts = [[] for _ in range(len(self.hed_strings))] - # for onset in self.onset_list: - # for i in range(onset.start_index+1, onset.end_index): - # contexts[i].append(onset.contents) - # for i in range(len(self.hed_strings)): - # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - # self.contexts = contexts - print("_set_event_contexts not implemented yet") - - def _update_onset_list(self, group, onset_dict, event_index): - """ Process one onset or offset group to create onset_list. - - Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. - - :raises HedFileError: - - if an unmatched offset is encountered. - - Notes: - - Modifies onset_dict and onset_list. - """ - # def_tags = group.find_def_tags(recursive=False, include_groups=0) - # name = def_tags[0].extension - # onset_element = onset_dict.pop(name, None) - # if onset_element: - # onset_element.end_index = event_index - # self.onset_list.append(onset_element) - # elif is_offset: - # raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - # if not is_offset: - # onset_element = TemporalEvent(name, group, event_index) - # onset_dict[name] = onset_element diff --git a/hed/tools/analysis/hed_context_manager_new.py b/hed/tools/analysis/hed_context_manager_new.py deleted file mode 100644 index f7dbdaeb0..000000000 --- a/hed/tools/analysis/hed_context_manager_new.py +++ /dev/null @@ -1,133 +0,0 @@ -""" Manages context and events of temporal extent. """ - -from hed.errors.exceptions import HedFileError -from hed.models import HedGroup, HedString -from hed.schema import HedSchema, HedSchemaGroup -from hed.tools.analysis.analysis_util import hed_to_str -from hed.tools.analysis.temporal_event import TemporalEvent - -# TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager. -# TODO: Implement insets - - -class HedContextManagerNew: - - def __init__(self, data, hed_schema): - """ Create a context manager for an events file. - - Parameters: - data (TabularInput): A TabularInput representing a data frame. - hed_schema (HedSchema): A HedSchema - - :raises HedFileError: - - If there are any unmatched offsets. - - Notes: - The constructor has the side effect of splitting each element of the hed_strings list into two - by removing the Offset groups and the Onset tags. The context has the temporal extent information. - For users wanting to use only Onset events, self.hed_strings contains the information. - - """ - self.data = data - self.hed_schema = hed_schema - if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup): - raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup") - self.event_manager = EventManager - self.onset_list = [] - self.onset_count = 0 - self.offset_count = 0 - self.contexts = [] - self._create_onset_list() - self._set_event_contexts() - - # def _extract_hed_objs(self, assembled): - # hed_objs = [None for _ in range(len(assembled))] - # for index, value in assembled["HED_assembled"].items(): - # hed_objs[index] = HedString(value, hed_schema=self.hed_schema) - # return hed_objs - - def iter_context(self): - """ Iterate rows of context. - - Yields: - HedString: The HedString. - HedString: Context - - """ - - for index in range(len(self.hed_strings)): - yield self.hed_strings[index], self.contexts[index] - - def _create_onset_list(self): - """ Create a list of events of extended duration. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - """ - - self.onset_list = [] - onset_dict = {} - for event_index, hed in enumerate(self.hed_strings): - to_remove = [] # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1) - onset_tuples = hed.find_top_level_tags(["onset"], include_groups=2) - self.onset_count += len(onset_tuples) - for tup in onset_tuples: - group = tup[1] - group.remove([tup[0]]) - self._update_onset_list(group, onset_dict, event_index, is_offset=False) - offset_tuples = hed.find_top_level_tags(["offset"], include_groups=2) - self.offset_count += len(offset_tuples) - for tup in offset_tuples: - group = tup[1] - to_remove.append(group) - self._update_onset_list(group, onset_dict, event_index, is_offset=True) - hed.remove(to_remove) - - # Now handle the events that extend to end of list - for key, value in onset_dict.items(): - value.end_index = len(self.hed_strings) - self.onset_list.append(value) - - def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in a event context group, but is kept in a separate array without the - event context group or tag. - - """ - contexts = [[] for _ in range(len(self.hed_strings))] - for onset in self.onset_list: - for i in range(onset.start_index+1, onset.end_index): - contexts[i].append(onset.contents) - for i in range(len(self.hed_strings)): - contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - self.contexts = contexts - - def _update_onset_list(self, group, onset_dict, event_index, is_offset=False): - """ Process one onset or offset group to create onset_list. - - Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. - is_offset (bool): True if processing an offset. - - :raises HedFileError: - - If an unmatched offset is encountered. - - Notes: - - Modifies onset_dict and onset_list. - """ - def_tags = group.find_def_tags(recursive=False, include_groups=0) - name = def_tags[0].extension - onset_element = onset_dict.pop(name, None) - if onset_element: - onset_element.end_index = event_index - self.onset_list.append(onset_element) - elif is_offset: - raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - if not is_offset: - onset_element = TemporalEvent(name, group, event_index) - onset_dict[name] = onset_element diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 300319820..daca2905d 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -82,7 +82,7 @@ def update_event_counts(self, hed_string_obj, file_name, definitions=None): Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. file_name (str): The name of the file corresponding to these counts. - definitions (dict): The definitions associated with the HED string. + definitions (dict): The type_defs associated with the HED string. """ if file_name not in self.files: diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_defs.py similarity index 85% rename from hed/tools/analysis/hed_type_definitions.py rename to hed/tools/analysis/hed_type_defs.py index 417083e44..ae23f90a3 100644 --- a/hed/tools/analysis/hed_type_definitions.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -1,40 +1,39 @@ -""" Manages definitions associated with a type such as condition-variable. """ +""" Manages type_defs associated with a type such as condition-variable. """ from hed.models.hed_tag import HedTag from hed.models.definition_dict import DefinitionDict -class HedTypeDefinitions: +class HedTypeDefs: """ Properties: def_map (dict): keys are definition names, values are dict {type_values, description, tags} - Example: A definition 'famous-face-cond' with contents - `(Condition-variable/Face-type,Description/A face that should be recognized by the participants,(Image,(Face,Famous)))` + Example: A definition 'famous-face-cond' with contents + `(Condition-variable/Face-type,Description/A face that should be recognized by the + participants,(Image,(Face,Famous)))` would have type_values ['face_type']. All items are strings not objects. """ - def __init__(self, definitions, hed_schema, type_tag='condition-variable'): + def __init__(self, definitions, type_tag='condition-variable'): """ Create a definition manager for a type of variable. Parameters: definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects. - hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing. type_tag (str): Lower-case HED tag string representing the type managed. """ self.type_tag = type_tag.lower() - self.hed_schema = hed_schema if isinstance(definitions, DefinitionDict): self.definitions = definitions.defs elif isinstance(definitions, dict): self.definitions = definitions else: self.definitions = {} - self.def_map = self._extract_def_map() - self.type_map = self._extract_type_map() # + self.def_map = self._extract_def_map() + self.type_map = self._extract_type_map() def get_type_values(self, item): """ Return a list of type_tag values in item. @@ -64,7 +63,7 @@ def _extract_def_map(self): return def_map def _extract_type_map(self): - """ Extract the definitions associated with each type value and add them to the dictionary. """ + """ Extract the type_defs associated with each type value and add them to the dictionary. """ type_map = {} for def_name, def_values in self.def_map.items(): @@ -122,7 +121,7 @@ def get_def_names(item, no_value=True): names = [tag.extension.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms] if no_value: for index, name in enumerate(names): - name, name_value = HedTypeDefinitions.split_name(name) + name, name_value = HedTypeDefs.split_name(name) names[index] = name return names diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index b4cc92af4..9911f545a 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -9,13 +9,13 @@ class HedTypeFactors: ALLOWED_ENCODINGS = ("categorical", "one-hot") - def __init__(self, type_tag, type_value, number_elements): + def __init__(self, type_value, type_tag, number_elements): """ Constructor for HedTypeFactors. Parameters: type_value (str): The value of the type summarized by this class. - number_elements (int): Number of elements in the data column type_tag (str): Lowercase string corresponding to a HED tag which has a takes value child. + number_elements (int): Number of elements in the data column """ diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 87ed57869..9bbefaa32 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -1,28 +1,25 @@ -""" Manager for type factors and type definitions. """ +""" Manager for type factors and type type_defs. """ import pandas as pd import json -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_types import HedTypes from hed.tools.analysis.hed_context_manager import HedContextManager class HedTypeManager: - def __init__(self, hed_strings, hed_schema, definitions): + def __init__(self, event_manager): """ Create a variable manager for one tabular file for all type variables. Parameters: - hed_strings (list): A list of HED strings. - hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing. - definitions (dict): A dictionary of DefinitionEntry objects. + event_manager (EventManager): an event manager for the tabular file. :raises HedFileError: - - On errors such as unmatched onsets or missing definitions. + - On errors such as unmatched onsets or missing type_defs. """ - self.definitions = definitions - self.context_manager = HedContextManager(hed_strings, hed_schema) + self.event_manager = event_manager self._type_tag_map = {} # a map of type tag into HedTypeValues objects @property @@ -33,7 +30,7 @@ def add_type_variable(self, type_name): if type_name.lower() in self._type_tag_map: return self._type_tag_map[type_name.lower()] = \ - HedTypeValues(self.context_manager, self.definitions, 'run-01', type_tag=type_name) + HedTypes(self.event_manager, 'run-01', type_tag=type_name) def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-hot"): """ Return a DataFrame of factor vectors for the indicated HED tag and values diff --git a/hed/tools/analysis/hed_type_values.py b/hed/tools/analysis/hed_types.py similarity index 80% rename from hed/tools/analysis/hed_type_values.py rename to hed/tools/analysis/hed_types.py index 3190d0bf4..12459855b 100644 --- a/hed/tools/analysis/hed_type_values.py +++ b/hed/tools/analysis/hed_types.py @@ -3,35 +3,39 @@ import pandas as pd from hed.models.hed_tag import HedTag from hed.models.hed_group import HedGroup -from hed.tools.analysis.hed_type_definitions import HedTypeDefinitions +from hed.tools.analysis.hed_type_defs import HedTypeDefs from hed.tools.analysis.hed_context_manager import HedContextManager from hed.tools.analysis.hed_type_factors import HedTypeFactors -class HedTypeValues: +class HedTypes: - def __init__(self, context_manager, definitions, name, type_tag="condition-variable"): + def __init__(self, event_manager, name, type_tag="condition-variable"): """ Create a variable manager for one type-variable for one tabular file. Parameters: - context_manager (HedContextManager): A list of HED strings. - definitions (dict): A dictionary of DefinitionEntry objects. + event_manager (EventManager): An event manager for the tabular file. name (str): Name of the tabular file as a unique identifier. type_tag (str): Lowercase short form of the tag to be managed. :raises HedFileError: - - On errors such as unmatched onsets or missing definitions. + - On errors such as unmatched onsets or missing type_defs. """ self.name = name self.type_tag = type_tag.lower() - self.definitions = HedTypeDefinitions(definitions, context_manager.hed_schema, type_tag=type_tag) - hed_strings = context_manager.hed_strings - hed_contexts = context_manager.contexts - self.total_events = len(hed_strings) - self._type_value_map = {} - self._extract_variables(hed_strings, hed_contexts) + self.event_manager = event_manager + self.type_defs = HedTypeDefs(event_manager.def_dict, type_tag=type_tag) + # hed_strings = context_manager.hed_strings + # hed_contexts = context_manager.contexts + # self.total_events = len(hed_strings) + self._type_map = {} + self._extract_variables() + @property + def total_events(self): + return len(self.event_manager.event_list) + def get_type_value_factors(self, type_value): """ Return the HedTypeFactors associated with type_name or None. @@ -42,7 +46,7 @@ def get_type_value_factors(self, type_value): HedTypeFactors or None """ - return self._type_value_map.get(type_value.lower(), None) + return self._type_map.get(type_value.lower(), None) def get_type_value_level_info(self, type_value): """ Return type variable corresponding to type_value. @@ -54,24 +58,24 @@ def get_type_value_level_info(self, type_value): """ - return self._type_value_map.get(type_value, None) + return self._type_map.get(type_value, None) @property def type_variables(self): - return set(self._type_value_map.keys()) + return set(self._type_map.keys()) def get_type_def_names(self): - """ Return the definitions """ + """ Return the type_defs """ tag_list = [] - for variable, factor in self._type_value_map.items(): + for variable, factor in self._type_map.items(): tag_list = tag_list + [key for key in factor.levels.keys()] return list(set(tag_list)) def get_type_value_names(self): - return list(self._type_value_map.keys()) + return list(self._type_map.keys()) def get_summary(self): - var_summary = self._type_value_map.copy() + var_summary = self._type_map.copy() summary = {} for var_name, var_sum in var_summary.items(): summary[var_name] = var_sum.get_summary() @@ -93,7 +97,7 @@ def get_type_factors(self, type_values=None, factor_encoding="one-hot"): type_values = self.get_type_value_names() df_list = [] for index, type_value in enumerate(type_values): - var_sum = self._type_value_map.get(type_value, None) + var_sum = self._type_map.get(type_value, None) if not var_sum: continue df_list.append(var_sum.get_factors(factor_encoding=factor_encoding)) @@ -103,7 +107,7 @@ def get_type_factors(self, type_values=None, factor_encoding="one-hot"): return pd.concat(df_list, axis=1) def __str__(self): - return f"{self.type_tag} type_variables: {str(list(self._type_value_map.keys()))}" + return f"{self.type_tag} type_variables: {str(list(self._type_map.keys()))}" def _extract_definition_variables(self, item, index): """ Extract the definition uses from a HedTag, HedGroup, or HedString. @@ -124,7 +128,7 @@ def _extract_definition_variables(self, item, index): for tag in tags: if tag.short_base_tag.lower() != "def": continue - hed_vars = self.definitions.get_type_values(tag) + hed_vars = self.type_defs.get_type_values(tag) if not hed_vars: continue self._update_definition_variables(tag, hed_vars, index) @@ -143,22 +147,22 @@ def _update_definition_variables(self, tag, hed_vars, index): """ level = tag.extension.lower() for var_name in hed_vars: - hed_var = self._type_value_map.get(var_name, None) + hed_var = self._type_map.get(var_name, None) if hed_var is None: hed_var = HedTypeFactors(self.type_tag, var_name, self.total_events) - self._type_value_map[var_name] = hed_var + self._type_map[var_name] = hed_var var_levels = hed_var.levels.get(level, {index: 0}) var_levels[index] = 0 hed_var.levels[level] = var_levels - def _extract_variables(self, hed_strings, hed_contexts): + def _extract_variables(self): """ Extract all type_variables from hed_strings and event_contexts. """ - for index, hed in enumerate(hed_strings): + for index, hed in enumerate(self.event_manager.hed_strings): self._extract_direct_variables(hed, index) self._extract_definition_variables(hed, index) - self._extract_direct_variables(hed_contexts[index], index) - self._extract_definition_variables(hed_contexts[index], index) + # self._extract_direct_variables(hed_contexts[index], index) + # self._extract_definition_variables(hed_contexts[index], index) def _extract_direct_variables(self, item, index): """ Extract the condition type_variables from a HedTag, HedGroup, or HedString. @@ -188,10 +192,10 @@ def _update_variables(self, tag_list, index): tag_value = tag.extension.lower() if not tag_value: tag_value = self.type_tag - hed_var = self._type_value_map.get(tag_value, None) + hed_var = self._type_map.get(tag_value, None) if hed_var is None: hed_var = HedTypeFactors(self.type_tag, tag_value, self.total_events) - self._type_value_map[tag_value] = hed_var + self._type_map[tag_value] = hed_var hed_var.direct_indices[index] = '' @@ -236,9 +240,9 @@ def _update_variables(self, tag_list, index): # 'Cond6': DefinitionEntry('Cond6', def6, True, None) # } # -# conditions1 = HedTypeValues(HedContextManager(test_strings1), hed_schema, defs) -# conditions2 = HedTypeValues(HedContextManager(test_strings2), hed_schema, defs) -# conditions3 = HedTypeValues(HedContextManager(test_strings3), hed_schema, defs) +# conditions1 = HedTypes(HedContextManager(test_strings1), hed_schema, defs) +# conditions2 = HedTypes(HedContextManager(test_strings2), hed_schema, defs) +# conditions3 = HedTypes(HedContextManager(test_strings3), hed_schema, defs) # bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), # '../../../tests/data/bids_tests/eeg_ds003645s_hed')) # events_path = os.path.realpath(os.path.join(bids_root_path, @@ -248,8 +252,8 @@ def _update_variables(self, tag_list, index): # input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") # hed_strings = get_assembled_strings(input_data, hed_schema=hed_schema, expand_defs=False) # onset_man = HedContextManager(hed_strings) -# definitions = input_data.get_definitions().gathered_defs -# var_type = HedTypeValues(onset_man, hed_schema, definitions) +# type_defs = input_data.get_definitions().gathered_defs +# var_type = HedTypes(onset_man, hed_schema, type_defs) # df = var_type.get_type_factors() # summary = var_type.get_summary() # df.to_csv("D:/wh_conditionslong.csv", sep='\t', index=False) diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index 876fee6ba..57563ffab 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -2,7 +2,7 @@ class TemporalEvent: - """ Represents an event process. + """ Represents an event process with starting and ending. Note: the contents must have a De """ diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 44f3f1a21..88fcc04c4 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -118,7 +118,7 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings Parameters: hed_schema (HedSchema): HED schema for validation. - extra_def_dicts (DefinitionDict): Extra definitions + extra_def_dicts (DefinitionDict): Extra type_defs check_for_warnings (bool): If True, include warnings in the check. Returns: @@ -141,7 +141,7 @@ def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warning Parameters: hed_schema (HedSchema): Schema to apply to the validation. - extra_def_dicts (DefinitionDict): Extra definitions that come from outside. + extra_def_dicts (DefinitionDict): Extra type_defs that come from outside. check_for_warnings (bool): If True, include warnings in the check. keep_contents (bool): If True, the underlying data files are read and their contents retained. diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 5a1e21804..28b0c6e55 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -1,4 +1,4 @@ -""" Summarize the definitions in the dataset. """ +""" Summarize the type_defs in the dataset. """ from hed import TabularInput from hed.tools.remodeling.operations.base_op import BaseOp @@ -7,7 +7,7 @@ class SummarizeDefinitionsOp(BaseOp): - """ Summarize the definitions in the dataset. + """ Summarize the type_defs in the dataset. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. @@ -28,7 +28,7 @@ class SummarizeDefinitionsOp(BaseOp): } } - SUMMARY_TYPE = 'definitions' + SUMMARY_TYPE = 'type_defs' def __init__(self, parameters): """ Constructor for the summarize column values operation. @@ -49,7 +49,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Create summaries of definitions + """ Create summaries of type_defs Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -135,7 +135,7 @@ def merge_all_info(self): """ Create an Object containing the definition summary. Returns: - Object - the overall summary object for definitions. + Object - the overall summary object for type_defs. """ return self.def_gatherer diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 0fcec5411..4d52583ae 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -114,7 +114,7 @@ def update_summary(self, new_info): hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) - # definitions = input_data.get_definitions().gathered_defs + # type_defs = input_data.get_definitions().gathered_defs for hed in hed_strings: counts.update_event_counts(hed, new_info['name']) self.summary_dict[new_info["name"]] = counts diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 04c1ad89b..80926c014 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -3,7 +3,7 @@ from hed.models.tabular_input import TabularInput from hed.models.sidecar import Sidecar from hed.models.df_util import get_assembled -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_types import HedTypes from hed.tools.analysis.hed_type_counts import HedTypeCounts from hed.tools.analysis.hed_context_manager import HedContextManager from hed.tools.remodeling.operations.base_op import BaseOp @@ -107,11 +107,11 @@ def update_summary(self, new_info): hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], extra_def_dicts=None, join_columns=True, expand_defs=False) context_manager = HedContextManager(hed_strings, new_info['schema']) - type_values = HedTypeValues(context_manager, definitions, new_info['name'], type_tag=self.type_tag) + type_values = HedTypes(context_manager, definitions, new_info['name'], type_tag=self.type_tag) counts = HedTypeCounts(new_info['name'], self.type_tag) counts.update_summary(type_values.get_summary(), type_values.total_events, new_info['name']) - counts.add_descriptions(type_values.definitions) + counts.add_descriptions(type_values.type_defs) self.summary_dict[new_info["name"]] = counts def get_details_dict(self, counts): diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index fcafcf87b..c615f6edc 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -10,10 +10,10 @@ class DefValidator(DefinitionDict): """ def __init__(self, def_dicts=None, hed_schema=None): - """ Initialize for definitions in hed strings. + """ Initialize for type_defs in hed strings. Parameters: - def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass + def_dicts (list or DefinitionDict or str): DefinitionDicts containing the type_defs to pass to baseclass hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. """ super().__init__(def_dicts, hed_schema=hed_schema) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 6ce937454..84cfbb16f 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -25,7 +25,7 @@ def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, defin hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation. def_dicts(DefinitionDict or list or dict): the def dicts to use for validation run_full_onset_checks(bool): If True, check for matching onset/offset tags - definitions_allowed(bool): If False, flag definitions found as errors + definitions_allowed(bool): If False, flag type_defs found as errors """ super().__init__() self._tag_validator = None diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 9e6f222fd..becbcd109 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -10,7 +10,7 @@ from hed.errors.error_reporter import check_for_any_errors -# todo: Add/improve validation for definitions being in known columns(right now it just assumes they aren't) +# todo: Add/improve validation for type_defs being in known columns(right now it just assumes they aren't) class SidecarValidator: reserved_column_names = ["HED"] reserved_category_values = ["n/a"] @@ -255,7 +255,7 @@ def _validate_pound_sign_count(self, hed_string, column_type): presence of definition tags. """ - # Make a copy without definitions to check placeholder count. + # Make a copy without type_defs to check placeholder count. expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type) hed_string_copy = copy.deepcopy(hed_string) hed_string_copy.remove_definitions() diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 025aa54d4..cdec262d1 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -27,7 +27,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Parameters: data (BaseInput or pd.DataFrame): Input data to be validated. If a dataframe, it is assumed to be assembled already. - def_dicts(list of DefDict or DefDict): all definitions to use for validation + def_dicts(list of DefDict or DefDict): all type_defs to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None Returns: diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index 586d823da..456d20b7f 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -110,7 +110,7 @@ def run_tag_level_validators(self, original_tag_list, is_top_level, is_group): Notes: - This is for the top-level, all groups, and nested groups. - - This can contain definitions, Onset, etc tags. + - This can contain type_defs, Onset, etc tags. """ validation_issues = [] @@ -444,7 +444,7 @@ def check_tag_level_issue(self, original_tag_list, is_top_level, is_group): list: Validation issues. Each issue is a dictionary. Notes: - - Top-level groups can contain definitions, Onset, etc tags. + - Top-level groups can contain type_defs, Onset, etc tags. """ validation_issues = [] top_level_tags = [tag for tag in original_tag_list if diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index ac817fa81..4a9b60ab5 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -100,7 +100,7 @@ def run_single_test(self, test_file): error_handler = ErrorHandler(check_for_warnings) if schema: schema = load_schema_version(schema) - definitions = info['definitions'] + definitions = info['type_defs'] def_dict = DefinitionDict(definitions, schema) self.assertFalse(def_dict.issues) else: diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 5f8b2bbab..447aaae95 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -51,7 +51,7 @@ def tearDownClass(cls): def test_gathered_defs(self): # todo: probably remove this test? - # todo: add unit tests for definitions in tsv file + # todo: add unit tests for type_defs in tsv file defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) expected_defs = { 'jsonfiledef': '(Acceleration/#,Item/JsonDef1)', diff --git a/tests/models/test_spreadsheet_input.py b/tests/models/test_spreadsheet_input.py index eeee6bc8d..bf2e98b98 100644 --- a/tests/models/test_spreadsheet_input.py +++ b/tests/models/test_spreadsheet_input.py @@ -159,7 +159,7 @@ def test_no_column_header_and_convert(self): self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) def test_convert_short_long_with_definitions(self): - # Verify behavior works as expected even if definitions are present + # Verify behavior works as expected even if type_defs are present events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/model_tests/no_column_header_definition.tsv') hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py index 75d143659..4b88f1bbe 100644 --- a/tests/tools/analysis/test_analysis_util_assemble_hed.py +++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py @@ -39,7 +39,7 @@ def test_assemble_hed_included_no_expand(self): self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags") self.assertEqual(first_str1.find('Def-expand'), -1, "assemble_hed with no def expand does not have Def-expand tags") - self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions") + self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of type_defs") self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.") def test_assemble_hed_included_expand(self): @@ -74,7 +74,7 @@ def test_assemble_hed_no_included_no_expand(self): self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags") self.assertEqual(first_str1.find('Def-expand'), -1, "assemble_hed with no def expand does not have Def-expand tags") - self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of definitions") + self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of type_defs") self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.") def test_assemble_hed_no_included_expand(self): @@ -95,7 +95,7 @@ def test_assemble_hed_bad_column_no_expand(self): self.assertEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags") def test_search_strings(self): - hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, + hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) queries1 = ["sensory-event"] query_names1 = ["sensory"] diff --git a/tests/tools/analysis/test_analysis_util_convert.py b/tests/tools/analysis/test_analysis_util_convert.py index 7150b8b58..d63caca31 100644 --- a/tests/tools/analysis/test_analysis_util_convert.py +++ b/tests/tools/analysis/test_analysis_util_convert.py @@ -1,8 +1,7 @@ import os import unittest -from pandas import DataFrame from hed import schema as hedschema -from hed.models import HedTag, HedString, HedGroup +from hed.models import HedTag, HedString from hed.tools.analysis.analysis_util import hed_to_str @@ -44,7 +43,6 @@ def test_hed_to_str_other(self): hed_to_str(dict1) self.assertEqual(context.exception.args[0], "ContentsWrongClass") - def test_hed_to_str_obj(self): str_obj1 = HedString('Label/Cond1', self.hed_schema) str1 = hed_to_str(str_obj1) @@ -106,5 +104,6 @@ def test_hed_to_str_remove_parentheses(self): self.assertIsInstance(str3, str) self.assertEqual(str3, 'Label/Cond1') + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py index 036b4c938..6ede945f0 100644 --- a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py +++ b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py @@ -1,9 +1,7 @@ import os import unittest from hed import schema as hedschema -from hed.models.hed_string import HedString from hed.models.tabular_input import TabularInput -# from hed.tools.analysis.analysis_util import get_assembled_strings # noinspection PyBroadException diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index f54dd1dc8..247c7680d 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -12,7 +12,6 @@ generate_sidecar_entry from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.util.io_util import get_file_list -from hed.validator import HedValidator # noinspection PyBroadException diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index 5e7937f7e..42cb88fc8 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -2,7 +2,6 @@ import unittest from hed.models.sidecar import Sidecar -from hed.models.df_util import get_assembled from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version from hed.tools.analysis.event_manager import EventManager diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index 5f2eebc27..2821abd55 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -78,7 +78,7 @@ def test_organize_tags(self): hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) - # definitions = input_data.get_definitions().gathered_defs + # type_defs = input_data.get_definitions().gathered_defs for hed in hed_strings: counts.update_event_counts(hed, 'run-1') self.assertIsInstance(counts.tag_dict, dict) diff --git a/tests/tools/analysis/test_hed_type_counts.py b/tests/tools/analysis/test_hed_type_counts.py index c4fd22cab..bebb7446b 100644 --- a/tests/tools/analysis/test_hed_type_counts.py +++ b/tests/tools/analysis/test_hed_type_counts.py @@ -4,7 +4,7 @@ from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_types import HedTypes from hed.tools.analysis.hed_type_counts import HedTypeCount, HedTypeCounts from hed.models.df_util import get_assembled @@ -23,7 +23,7 @@ def setUpClass(cls): input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") hed_strings1, definitions1 = get_assembled(input_data, sidecar1, schema, extra_def_dicts=None, join_columns=True, shrink_defs=True, expand_defs=False) - cls.var_type1 = HedTypeValues(HedContextManager(hed_strings1, schema), definitions1, 'run-01', + cls.var_type1 = HedTypes(HedContextManager(hed_strings1, schema), definitions1, 'run-01', type_tag='condition-variable') def test_type_count_one_level(self): @@ -62,7 +62,7 @@ def test_get_summary_multiple_levels(self): self.assertEqual(face_type.total_events, 400) self.assertEqual(face_type.events, 104) self.assertEqual(len(face_type.files), 2) - counts.add_descriptions(self.var_type1.definitions) + counts.add_descriptions(self.var_type1.type_defs) self.assertTrue(face_type.level_counts['famous-face-cond']['description']) diff --git a/tests/tools/analysis/test_hed_type_definitions.py b/tests/tools/analysis/test_hed_type_defs.py similarity index 86% rename from tests/tools/analysis/test_hed_type_definitions.py rename to tests/tools/analysis/test_hed_type_defs.py index 7388d1228..5399cd7cf 100644 --- a/tests/tools/analysis/test_hed_type_definitions.py +++ b/tests/tools/analysis/test_hed_type_defs.py @@ -5,7 +5,7 @@ from hed.models.hed_tag import HedTag from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput -from hed.tools.analysis.hed_type_definitions import HedTypeDefinitions +from hed.tools.analysis.hed_type_defs import HedTypeDefs from hed.schema.hed_schema_io import load_schema_version @@ -24,12 +24,15 @@ def setUpClass(cls): HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema), HedString('Arm, Leg, Condition-variable/Fast', hed_schema=schema)] def1 = HedString('(Condition-variable/Var1, Circle, Square, Description/This is def1)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) + def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', + hed_schema=schema) def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', hed_schema=schema) def4 = HedString('(Condition-variable, Apple, Banana, Description/This is def4)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana, Description/This is def5)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana, Description/This is def6)', hed_schema=schema) + def5 = HedString('(Condition-variable/Lumber, Apple, Banana, Description/This is def5)', + hed_schema=schema) + def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana, Description/This is def6)', + hed_schema=schema) cls.definitions1 = {'Cond1': DefinitionEntry('Cond1', def1, False, None), 'Cond2': DefinitionEntry('Cond2', def2, False, None), 'Cond3': DefinitionEntry('Cond3', def3, True, None), @@ -48,24 +51,24 @@ def setUpClass(cls): cls.sidecar1 = sidecar1 def test_constructor(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) - self.assertIsInstance(def_man, HedTypeDefinitions, + def_man = HedTypeDefs(self.definitions1) + self.assertIsInstance(def_man, HedTypeDefs, "Constructor should create a HedTypeDefinitions directly from a dict") self.assertEqual(len(def_man.def_map), 6, "Constructor condition_map should have the right length") self.assertEqual(len(def_man.def_map), len(def_man.definitions), - "Constructor condition_map should be the same length as the definitions dictionary") + "Constructor condition_map should be the same length as the type_defs dictionary") def test_constructor_from_sidecar(self): definitions = self.sidecar1.get_def_dict(self.schema) - def_man = HedTypeDefinitions(definitions, self.schema) - self.assertIsInstance(def_man, HedTypeDefinitions, + def_man = HedTypeDefs(definitions) + self.assertIsInstance(def_man, HedTypeDefs, "Constructor should create a HedTypeDefinitions from a tabular input") self.assertEqual(len(def_man.def_map), 17, "Constructor condition_map should have the right length") self.assertEqual(len(def_man.def_map), len(def_man.definitions), - "Constructor condition_map should be the same length as the definitions dictionary") + "Constructor condition_map should be the same length as the type_defs dictionary") def test_get_vars(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) + def_man = HedTypeDefs(self.definitions1) item1 = HedString("Sensory-event,((Red,Blue)),", self.schema) vars1 = def_man.get_type_values(item1) self.assertFalse(vars1, "get_type_values should return None if no condition type_variables") @@ -78,7 +81,7 @@ def test_get_vars(self): self.assertEqual(len(vars3), 5, "get_type_values should return multiple condition type_variables") def test_get_def_names(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) + def_man = HedTypeDefs(self.definitions1) a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) self.assertEqual(len(a), 1, "get_def_names returns 1 item if single tag") self.assertEqual(a[0], 'cond3', "get_def_names returns the correct item if single tag") @@ -96,16 +99,16 @@ def test_get_def_names(self): self.assertFalse(e, "get_def_names returns no items if no defs") def test_split_name(self): - name1, val1 = HedTypeDefinitions.split_name('') + name1, val1 = HedTypeDefs.split_name('') self.assertIsNone(name1, "split_name should return None split name for empty name") self.assertIsNone(val1, "split_name should return None split value for empty name") - name2, val2 = HedTypeDefinitions.split_name('lumber') + name2, val2 = HedTypeDefs.split_name('lumber') self.assertEqual(name2, 'lumber', 'split_name should return name if no split value') self.assertEqual(val2, '', 'split_name should return empty string if no split value') - name3, val3 = HedTypeDefinitions.split_name('Lumber/5.23', lowercase=False) + name3, val3 = HedTypeDefs.split_name('Lumber/5.23', lowercase=False) self.assertEqual(name3, 'Lumber', 'split_name should return name if split value') self.assertEqual(val3, '5.23', 'split_name should return value as string if split value') - name4, val4 = HedTypeDefinitions.split_name('Lumber/5.23') + name4, val4 = HedTypeDefs.split_name('Lumber/5.23') self.assertEqual(name4, 'lumber', 'split_name should return name if split value') self.assertEqual(val4, '5.23', 'split_name should return value as string if split value') diff --git a/tests/tools/analysis/test_hed_type_factors.py b/tests/tools/analysis/test_hed_type_factors.py index 378617a12..a43349513 100644 --- a/tests/tools/analysis/test_hed_type_factors.py +++ b/tests/tools/analysis/test_hed_type_factors.py @@ -8,7 +8,7 @@ from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_types import HedTypes from hed.tools.analysis.hed_type_factors import HedTypeFactors from hed.models.df_util import get_assembled @@ -94,7 +94,7 @@ def test_constructor_multiple_values(self): var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') self.assertIsInstance(var_manager, HedTypeValues, "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") var_fact1 = var_manager.get_type_value_factors('var2') self.assertIsInstance(var_fact1, HedTypeFactors) @@ -118,7 +118,7 @@ def test_variable_summary(self): var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') self.assertIsInstance(var_manager, HedTypeValues, "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") for variable in var_manager.get_type_value_names(): var_sum = var_manager.get_type_value_factors(variable) @@ -129,7 +129,7 @@ def test_get_variable_factors(self): var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') self.assertIsInstance(var_manager, HedTypeValues, "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") for variable in var_manager.get_type_value_names(): @@ -139,11 +139,11 @@ def test_get_variable_factors(self): self.assertIsInstance(factors, pd.DataFrame, "get_factors contains dataframe.") self.assertEqual(len(factors), var_sum.number_elements, "get_factors has factors of same length as number of elements") - if not var_manager._type_value_map[variable].levels: + if not var_manager._type_map[variable].levels: self.assertEqual(len(factors.columns), 1) else: self.assertEqual(len(factors.columns), summary["levels"], 'get_factors has factors levels') - self.assertEqual(len(factors.columns), len(var_manager._type_value_map[variable].levels)) + self.assertEqual(len(factors.columns), len(var_manager._type_map[variable].levels)) def test_count_events(self): list1 = [0, 2, 6, 1, 2, 0, 0] diff --git a/tests/tools/analysis/test_hed_type_values.py b/tests/tools/analysis/test_hed_types.py similarity index 69% rename from tests/tools/analysis/test_hed_type_values.py rename to tests/tools/analysis/test_hed_types.py index d8428e23c..9113988c0 100644 --- a/tests/tools/analysis/test_hed_type_values.py +++ b/tests/tools/analysis/test_hed_types.py @@ -2,14 +2,14 @@ import unittest from pandas import DataFrame from hed.errors.exceptions import HedFileError -from hed.models import DefinitionEntry +from hed.models import DefinitionDict from hed.models.hed_string import HedString from hed.models.hed_tag import HedTag from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_types import HedTypes from hed.models.df_util import get_assembled @@ -18,14 +18,35 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", - '(Def/Cond1, Offset)', - 'White, Black, Condition-variable/Wonder, Condition-variable/Fast', - '', - '(Def/Cond2, Onset)', - '(Def/Cond3/4.3, Onset)', - 'Arm, Leg, Condition-variable/Fast'] + # Set up the definition dictionary + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3, (Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Apple, Banana))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Arm, Leg, Condition-variable/Fast"] + test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] + df1 = DataFrame(test_onsets1, columns=['onset']) + df1['HED'] = test_strings1 + input_data = TabularInput(df1) + event_man1 = EventManager(input_data, schema, extra_defs=def_dict) + event_man1.def_dict = def_dict + cls.event_man1 = event_man1 cls.test_strings2 = ["Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", "Yellow", "Def/Cond2, (Def/Cond6/4, Onset)", @@ -33,21 +54,6 @@ def setUpClass(cls): "Def/Cond2, Def/Cond6/4"] cls.test_strings3 = ['(Def/Cond3, Offset)'] - def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=schema) - cls.defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) cls.events_path = os.path.realpath(os.path.join(bids_root_path, @@ -56,29 +62,23 @@ def setUpClass(cls): cls.schema = schema def test_constructor(self): - strings1 = [HedString(hed, hed_schema=self.schema) for hed in self.test_strings1] - con_man = HedContextManager(strings1, hed_schema=self.schema) - type_var = HedTypeValues(con_man, self.defs, 'run-01') - self.assertIsInstance(type_var, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(type_var._type_value_map), 8, + type_var = HedTypes(self.event_man1, 'test-it') + self.assertIsInstance(type_var, HedTypes,"Constructor should create a HedTypes from an event manager") + self.assertEqual(len(type_var._type_map), 8, "Constructor ConditionVariables should have the right length") def test_constructor_from_tabular_input(self): sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from a tabular input") + event_man = EventManager(input_data, self.schema) + var_man = HedTypes(event_man, 'face') + self.assertIsInstance(var_man, HedTypes,"Constructor should create a HedTypeManager from a tabular input") def test_constructor_variable_caps(self): sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), + event_man = EventManager(input_data, self.schema) + var_manager = HedTypes(HedContextManager(test_strings1, self.schema), definitions, 'run-01', type_tag="Condition-variable") self.assertIsInstance(var_manager, HedTypeValues, "Constructor should create a HedTypeManager variable caps") @@ -98,7 +98,7 @@ def test_constructor_multiple_values(self): var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') self.assertIsInstance(var_manager, HedTypeValues, "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") def test_constructor_unmatched(self): @@ -146,7 +146,7 @@ def test_summarize_variables(self): def test_extract_definition_variables(self): hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1] var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - var_levels = var_manager._type_value_map['var3'].levels + var_levels = var_manager._type_map['var3'].levels self.assertNotIn('cond3/7', var_levels, "_extract_definition_variables before extraction def/cond3/7 not in levels") tag = HedTag("Def/Cond3/7", hed_schema=self.schema) diff --git a/tests/tools/analysis/test_hed_types_temp.py b/tests/tools/analysis/test_hed_types_temp.py new file mode 100644 index 000000000..f387dd598 --- /dev/null +++ b/tests/tools/analysis/test_hed_types_temp.py @@ -0,0 +1,52 @@ +import os +import unittest +from pandas import DataFrame +from hed.errors.exceptions import HedFileError +from hed.models import DefinitionDict +from hed.models.hed_string import HedString +from hed.models.hed_tag import HedTag +from hed.models.sidecar import Sidecar +from hed.models.tabular_input import TabularInput +from hed.schema.hed_schema_io import load_schema_version +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_types import HedTypes +from hed.models.df_util import get_assembled + + +class Test(unittest.TestCase): + + def test_1(self): + schema = load_schema_version(xml_version="8.1.0") + # Set up the definition dictionary + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3, (Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Apple, Banana))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Arm, Leg, Condition-variable/Fast"] + test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] + df1 = DataFrame(test_onsets1, columns=['onset']) + df1['HED'] = test_strings1 + input_data = TabularInput(df1) + defs = input_data.get_def_dict(schema,extra_def_dicts=def_dict) + self.assertIsInstance(input_data, TabularInput) + print(defs) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py index b983c6f8b..40c7ad8db 100644 --- a/tests/tools/analysis/test_tabular_summary.py +++ b/tests/tools/analysis/test_tabular_summary.py @@ -36,18 +36,25 @@ def test_extract_summary(self): tab1 = TabularSummary() stern_df = get_new_dataframe(self.stern_map_path) tab1.update(stern_df) - sum_info = tab1.get_summary() - new_tab1 = TabularSummary.extract_summary(sum_info) + sum_info1 = tab1.get_summary() + self.assertIsInstance(sum_info1, dict) + self.assertEqual(len(sum_info1['Categorical columns']), 4) + new_tab1 = TabularSummary.extract_summary(sum_info1) + self.assertIsInstance(new_tab1, TabularSummary) tab2 = TabularSummary(value_cols=['letter'], skip_cols=['event_type']) + sum_info2 = tab2.get_summary() + self.assertIsInstance(sum_info2, dict) + new_tab2 = TabularSummary.extract_summary(sum_info2) + self.assertIsInstance(new_tab2, TabularSummary) tabular_info = {} - new_tab = TabularSummary.extract_summary(tabular_info) - self.assertIsInstance(new_tab, TabularSummary) + new_tab3 = TabularSummary.extract_summary(tabular_info) + self.assertIsInstance(new_tab3, TabularSummary) def test_extract_summary_empty(self): tabular_info = {} new_tab = TabularSummary.extract_summary(tabular_info) self.assertIsInstance(new_tab, TabularSummary) - + def test_get_number_unique_values(self): dict1 = TabularSummary() wh_df = get_new_dataframe(self.wh_events_path) @@ -218,7 +225,7 @@ def test_update_summary(self): tab.update(df, name=name) self.assertEqual(tab.total_events, 200) self.assertEqual(tab.total_files, 1) - tab_all.update_summary(tab) + tab_all.update_summary(tab) self.assertEqual(len(files_bids), tab_all.total_files) self.assertEqual(len(files_bids)*200, tab_all.total_events) diff --git a/tests/tools/analysis/test_temporal_event.py b/tests/tools/analysis/test_temporal_event.py index a05545bd6..cff1efa09 100644 --- a/tests/tools/analysis/test_temporal_event.py +++ b/tests/tools/analysis/test_temporal_event.py @@ -3,7 +3,6 @@ from hed.schema.hed_schema_io import load_schema_version from hed.models import HedString, HedGroup, Sidecar, TabularInput -from hed.models.df_util import get_assembled from hed.tools.analysis.temporal_event import TemporalEvent from hed.tools.analysis.event_manager import EventManager diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index 0e3bcdfab..e5338d6d0 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -107,7 +107,7 @@ def test_complex_file_validation_invalid(self): def test_complex_file_validation_invalid_definitions_removed(self): # todo: update this/remove - # This verifies definitions are being removed from sidecar strings before being added, or it will produce + # This verifies type_defs are being removed from sidecar strings before being added, or it will produce # extra errors. schema_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/validator_tests/bids_schema.mediawiki')) From cbc429bce50ecd60760783ba50c0170b2e1ca35c Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 26 Aug 2023 16:55:44 -0500 Subject: [PATCH 2/4] Updated the event manager --- hed/tools/analysis/event_manager.py | 139 ++++++++++++------ hed/tools/analysis/hed_tag_manager.py | 131 ++++------------- hed/tools/analysis/hed_type.py | 12 +- .../operations/factor_hed_tags_op.py | 2 + .../operations/summarize_hed_tags_op.py | 30 ++-- tests/tools/analysis/test_event_manager.py | 19 ++- tests/tools/analysis/test_hed_tag_manager.py | 25 +++- tests/tools/analysis/test_hed_type.py | 11 +- tests/tools/analysis/test_hed_type_defs.py | 51 +++---- .../operations/test_summarize_hed_tags_op.py | 4 +- 10 files changed, 208 insertions(+), 216 deletions(-) diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 4ef0e5b4d..27882b43f 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -3,6 +3,7 @@ from hed.models import HedString from hed.models.model_constants import DefTagNames from hed.models.df_util import get_assembled +from hed.models.string_util import split_base_tags, split_def_tags from hed.tools.analysis.temporal_event import TemporalEvent from hed.tools.analysis.hed_type_defs import HedTypeDefs @@ -102,19 +103,39 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) # self.contexts = contexts - def unfold_context(self): - """ Creates an event context for each hed string. + def unfold_context(self, remove_types=[]): + """ Unfolds the event information into hed, base, and contexts either as arrays of str or of HedString. + + Parameters: + remove_types (list): List of types to remove. + replace_defs (bool): If True the def term is replaced by its definition group. Returns: - list of str - list of list of str - list of list of str + list of str or HedString representing the information without the events of temporal extent + list of str or HedString representing the onsets of the events of temporal extent + list of str or HedString representing the ongoing context information. """ - hed = ["" for _ in range(len(self.hed_strings))] + + placeholder = "" + remove_defs = self.find_type_defs(remove_types) + hed = [placeholder for _ in range(len(self.hed_strings))] + new_base = [placeholder for _ in range(len(self.hed_strings))] + new_contexts = [placeholder for _ in range(len(self.hed_strings))] + base, contexts = self.expand_context() for index, item in enumerate(self.hed_strings): - if item: - hed[index] = str(item) + hed[index] = self._process_hed(item, remove_types=remove_types, + remove_defs=remove_defs, remove_group=False) + new_base[index] = self._process_hed(base[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) + new_contexts[index] = self._process_hed(contexts[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) + return hed, new_base, new_contexts # these are each a list of strings + + def expand_context(self): + """ Expands the onset and the ongoing context for additional processing. + + """ base = [[] for _ in range(len(self.hed_strings))] contexts = [[] for _ in range(len(self.hed_strings))] for events in self.event_list: @@ -123,7 +144,33 @@ def unfold_context(self): base[event.start_index].append(this_str) for i in range(event.start_index + 1, event.end_index): contexts[i].append(this_str) - return hed, self.compress_strings(base), self.compress_strings(contexts) # these are each a list of lists of strings + + return self.compress_strings(base), self.compress_strings(contexts) + + def _process_hed(self, hed, remove_types=[], remove_defs=[], remove_group=False): + if not hed: + return "" + # Reconvert even if hed is already a HedString to make sure a copy and expandable. + hed_obj = HedString(str(hed), hed_schema=self.hed_schema, def_dict=self.def_dict) + hed_obj, temp1 = split_base_tags(hed_obj, remove_types, remove_group=False) + if remove_defs: + hed_obj, temp2 = split_def_tags(hed_obj, remove_defs, remove_group=remove_group) + return str(hed_obj) + + def str_list_to_hed(self, str_list): + """ Create a HedString object from a list of strings. + + Parameters: + str_list (list): A list of strings to be concatenated with commas and then converted. + + Returns: + HedString or None: The converted list. + + """ + filtered_list = [item for item in str_list if item != ''] # list of strings + if not filtered_list: # empty lists don't contribute + return None + return HedString(",".join(filtered_list), self.hed_schema, def_dict=self.def_dict) @staticmethod def compress_strings(list_to_compress): @@ -132,44 +179,48 @@ def compress_strings(list_to_compress): if item: result_list[index] = ",".join(item) return result_list - + def find_type_defs(self, types): + """ Return a list of definition names (lower case) that correspond to one of the specified types. + + Parameters: + types (list): List of tags that are treated as types such as 'Condition-variable' + + Returns: + list: List of definition names (lower-case) that correspond to the specified types + + """ def_names = {} + if not types: + return for type_tag in types: type_defs = HedTypeDefs(self.def_dict, type_tag=type_tag) def_names[type_tag] = type_defs.def_map return def_names - - def filter_type(self): - print("to here") - - # def unfold_context(self): - # """ Creates an event context for each hed string. - # - # Returns: - # (tuple): list of hed str, list of list of hed str - # - # """ - # hed = [[] for _ in range(len(self.hed_strings))] - # for index, item in enumerate(self.hed_strings): - # if item: - # hed[index] = [str(item)] - # contexts = [[] for _ in range(len(self.hed_strings))] - # for events in self.event_list: - # for event in events: - # this_str = str(event.contents) - # hed[event.start_index].append(this_str) - # for i in range(event.start_index + 1, event.end_index): - # contexts[i].append(this_str) - # return hed, contexts # these are each a list of lists of strings - - @staticmethod - def fix_list(hed_list, hed_schema, as_string=False): - for index, item in enumerate(hed_list): - if not item: - hed_list[index] = None - elif as_string: - hed_list[index] = ",".join(str(item)) - else: - hed_list[index] = HedString(",".join(str(item)), hed_schema) - return hed_list + + def get_type_defs(self, types): + """ Return a list of definition names (lower case) that correspond to one of the specified types. + + Parameters: + types (list): List of tags that are treated as types such as 'Condition-variable' + + Returns: + list: List of definition names (lower-case) that correspond to the specified types + + """ + def_list = [] + for this_type in types: + type_defs = HedTypeDefs(self.def_dict, type_tag=this_type) + def_list = def_list + list(type_defs.def_map.keys()) + return def_list + + # @staticmethod + # def fix_list(hed_list, hed_schema, as_string=False): + # for index, item in enumerate(hed_list): + # if not item: + # hed_list[index] = None + # elif as_string: + # hed_list[index] = ",".join(str(item)) + # else: + # hed_list[index] = HedString(",".join(str(item)), hed_schema) + # return hed_list diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py index 703aa7749..7849d1e36 100644 --- a/hed/tools/analysis/hed_tag_manager.py +++ b/hed/tools/analysis/hed_tag_manager.py @@ -1,16 +1,12 @@ """ Manager for the HED tags in a tabular file. """ -import pandas as pd -import json -from hed.tools.analysis.hed_type import HedType from hed.models import HedString from hed.models.string_util import split_base_tags, split_def_tags -from hed.tools.analysis.hed_type_defs import HedTypeDefs class HedTagManager: - def __init__(self, event_manager, remove_types=None, include_context=False): + def __init__(self, event_manager, remove_types=[]): """ Create a tag manager for one tabular file. Parameters: @@ -22,114 +18,41 @@ def __init__(self, event_manager, remove_types=None, include_context=False): self.event_manager = event_manager self.remove_types = remove_types - [self.hed_strings, self.base_strings, self.context_strings] = self.event_manager.unfold_context() + self.hed_strings, self.base_strings, self.context_strings = ( + self.event_manager.unfold_context(remove_types=remove_types)) self.type_def_names = self.event_manager.find_type_defs(remove_types) - def get_hed_objs(self, include_context=True): + # def get_hed_objs1(self, include_context=True): + # hed_objs = [None for _ in range(len(self.event_manager.onsets))] + # for index in range(len(hed_objs)): + # hed_list = [self.hed_strings[index], self.base_strings[index]] + # if include_context and self.context_strings[index]: + # hed_list.append('(Event-context, (' + self.context_strings[index] + "))") + # hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) + # return hed_objs + + def get_hed_objs(self, include_context=True, replace_defs=False): hed_objs = [None for _ in range(len(self.event_manager.onsets))] for index in range(len(hed_objs)): - hed_list = self.hed_strings[index] + self.base_strings[index] - if hed_list: - hed = ",".join(hed_list) - else: - hed = "" + hed_list = [self.hed_strings[index], self.base_strings[index]] if include_context and self.context_strings[index]: - hed = hed + ',(Event-context, (' + ",".join(self.context_strings[index]) + "))" - - hed_objs[index] = HedString(hed,self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) + hed_list.append("(Event-context, (" + self.context_strings[index] + "))") + hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) + if replace_defs: + for def_tag in hed_objs[index].find_def_tags(recursive=True, include_groups=0): + hed_objs[index].replace(def_tag, def_tag.expandable.get_first_group()) return hed_objs - def filter_types(self, hed_strings, types, groups=False): - hed_objs = [None for _ in range(len(hed_strings))] - for hed in hed_strings: - if not hed: - continue + def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): + if not hed_str: + return None + hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) + if remove_types: + hed_obj, temp = split_base_tags(hed_obj, self.remove_types, remove_group=remove_group) + return hed_obj + def get_hed_string_obj(self, hed_str, filter_types=False): hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) # if filter_types: # hed_obj = hed_obj - for def_tag in hed_obj.find_def_tags(recursive=True, include_groups=0): - hed_obj.replace(def_tag, def_tag.expandable.get_first_group()) return hed_obj - # def _initialize(self): - # for index in range(len(hed)): - # keep_hed = hed[index].copy() - # for type_name in self.remove_types: - # keep_hed, lose_hed = split_base_tags(keep_hed, type_name) - # print(f"Keep {keep_hed}") - # print(f"Lose {lose_hed}") - # - - # def _extract_definition_variables(self, item, index): - # """ Extract the definition uses from a HedTag, HedGroup, or HedString. - # - # Parameters: - # item (HedTag, HedGroup, or HedString): The item to extract variable information from. - # index (int): Position of this item in the object's hed_strings. - # - # Notes: - # This updates the HedTypeFactors information. - # - # """ - # - # if isinstance(item, HedTag): - # tags = [item] - # else: - # tags = item.get_all_tags() - # for tag in tags: - # if tag.short_base_tag.lower() != "def": - # continue - # hed_vars = self.type_defs.get_type_values(tag) - # if not hed_vars: - # continue - # self._update_definition_variables(tag, hed_vars, index) - # - # def _update_definition_variables(self, tag, hed_vars, index): - # """Update the HedTypeFactors map with information from Def tag. - # - # Parameters: - # tag (HedTag): A HedTag that is a Def tag. - # hed_vars (list): A list of names of the hed type_variables - # index (ind): The event number associated with this. - # - # Notes: - # This modifies the HedTypeFactors map. - # - # """ - # level = tag.extension.lower() - # for var_name in hed_vars: - # hed_var = self._type_map.get(var_name, None) - # if hed_var is None: - # hed_var = HedTypeFactors(self.type_tag, var_name, self.total_events) - # self._type_map[var_name] = hed_var - # var_levels = hed_var.levels.get(level, {index: 0}) - # var_levels[index] = 0 - # hed_var.levels[level] = var_levels - # - # def _extract_variables(self): - # """ Extract all type_variables from hed_strings and event_contexts. """ - # - # hed, context = self.event_manager.unfold_context() - # for index in range(len(hed)): - # this_list = hed[index] + context[index] # list of strings - # if not this_list: # empty lists don't contribute - # continue - # this_hed = HedString(",".join(this_list), self.event_manager.hed_schema) - # self._extract_direct_variables(this_hed, index) - # self._extract_definition_variables(this_hed, index) - # - # def _extract_direct_variables(self, item, index): - # """ Extract the condition type_variables from a HedTag, HedGroup, or HedString. - # - # Parameters: - # item (HedTag or HedGroup): The item from which to extract condition type_variables. - # index (int): Position in the array. - # - # """ - # if isinstance(item, HedTag) and item.short_base_tag.lower() == self.type_tag: - # tag_list = [item] - # elif isinstance(item, HedGroup) and item.children: - # tag_list = item.find_tags_with_term(self.type_tag, recursive=True, include_groups=0) - # else: - # tag_list = [] - # self._update_variables(tag_list, index) \ No newline at end of file diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index b0778ec5c..fa57183ac 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -154,13 +154,11 @@ def _extract_variables(self): hed, base, context = self.event_manager.unfold_context() for index in range(len(hed)): - this_list = hed[index] + base[index] + context[index] # list of strings - if not this_list: # empty lists don't contribute - continue - this_hed = HedString(this_list, self.event_manager.hed_schema) - tag_list = self.get_type_list(self.type_tag, this_hed) - self._update_variables(tag_list, index) - self._extract_definition_variables(this_hed, index) + this_hed = self.event_manager.str_list_to_hed([hed[index], base[index], context[index]]) + if this_hed: + tag_list = self.get_type_list(self.type_tag, this_hed) + self._update_variables(tag_list, index) + self._extract_definition_variables(this_hed, index) @staticmethod def get_type_list(type_tag, item): diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 675be0b2a..c5b2ca08f 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -8,6 +8,7 @@ from hed.models.sidecar import Sidecar from hed.models.df_util import get_assembled from hed.tools.analysis.analysis_util import get_expression_parsers, search_strings +from hed.tools.analysis.event_manager import EventManager class FactorHedTagsOp(BaseOp): @@ -89,6 +90,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("QueryNameAlreadyColumn", f"Query [{query_name}]: is already a column name of the data frame") df_list = [input_data.dataframe] + event_man = EventManager(input_data, dispatcher.hed_schema) hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) df_factors = search_strings(hed_strings, self.expression_parsers, query_names=self.query_names) diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 552a56c55..c79ba0f00 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -1,8 +1,9 @@ """ Summarize the HED tags in collection of tabular files. """ from hed.models.tabular_input import TabularInput -from hed.models.sidecar import Sidecar from hed.tools.analysis.hed_tag_counts import HedTagCounts +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager from hed.tools.remodeling.operations.base_op import BaseOp from hed.tools.remodeling.operations.base_summary import BaseSummary from hed.models.df_util import get_assembled @@ -36,7 +37,8 @@ class SummarizeHedTagsOp(BaseOp): "optional_parameters": { "append_timecode": bool, "expand_context": bool, - "expand_definitions": bool + "replace_defs": bool, + "remove_types": list } } @@ -61,7 +63,9 @@ def __init__(self, parameters): self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] self.append_timecode = parameters.get('append_timecode', False) - self.expand_context = parameters.get('expand_context', False) + self.expand_context = parameters.get('expand_context', True) + self.replace_defs = parameters.get("replace_defs", True) + self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"]) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -93,8 +97,7 @@ class HedTagSummary(BaseSummary): def __init__(self, sum_op): super().__init__(sum_op) - self.tags = sum_op.tags - self.expand_context = sum_op.expand_context + self.sum_op = sum_op def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -107,15 +110,10 @@ def update_summary(self, new_info): """ counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) - sidecar = new_info['sidecar'] - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(sidecar) - input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) - hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], - extra_def_dicts=None, join_columns=True, - shrink_defs=False, expand_defs=True) - # type_defs = input_data.get_definitions().gathered_defs - for hed in hed_strings: + input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + tag_man = HedTagManager(EventManager(input_data, new_info['schema'])) + hed_objs = tag_man.get_hed_objs(self.sum_op.expand_context, self.sum_op.replace_defs) + for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) self.summary_dict[new_info["name"]] = counts @@ -129,9 +127,9 @@ def get_details_dict(self, tag_counts): dict: dictionary with the summary results. """ - template, unmatched = tag_counts.organize_tags(self.tags) + template, unmatched = tag_counts.organize_tags(self.sum_op.tags) details = {} - for key, key_list in self.tags.items(): + for key, key_list in self.sum_op.tags.items(): details[key] = self._get_details(key_list, template, verbose=True) leftovers = [value.get_info(verbose=True) for value in unmatched] return {"Name": tag_counts.name, "Total events": tag_counts.total_events, diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index 7e97a509f..4e7ee1051 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -47,10 +47,23 @@ def test_unfold_context(self): manager1 = EventManager(self.input_data, self.schema) hed, base, context = manager1.unfold_context() for index in range(len(manager1.onsets)): - self.assertIsInstance(hed[index], list) - self.assertIsInstance(base[index], list) + self.assertIsInstance(hed[index], str) + self.assertIsInstance(base[index], str) # ToDo finish tests - + + def test_str_list_to_hed(self): + manager1 = EventManager(self.input_data, self.schema) + hed, base, context = manager1.unfold_context() + hedObj1 = manager1.str_list_to_hed(['', '', '']) + self.assertFalse(hedObj1) + hedObj2 = manager1.str_list_to_hed([hed[0], base[0]]) + self.assertIsInstance(hedObj2, HedString) + + def test_get_type_defs(self): + manager1 = EventManager(self.input_data, self.schema) + def_names = manager1.get_type_defs(["Condition-variable", "task"]) + self.assertIsInstance(def_names, list) + def test_fix_list(self): list1 = [[], [HedString('Red,Black', self.schema), HedString('(Green,Blue)', self.schema)], [HedString('Red,Black', self.schema), HedString('(Green,Blue)', self.schema)]] diff --git a/tests/tools/analysis/test_hed_tag_manager.py b/tests/tools/analysis/test_hed_tag_manager.py index c74ae6be5..dcdaa0ddd 100644 --- a/tests/tools/analysis/test_hed_tag_manager.py +++ b/tests/tools/analysis/test_hed_tag_manager.py @@ -75,17 +75,26 @@ def setUpClass(cls): def test_constructor_from_tabular_input(self): event_man = EventManager(self.input_data, self.schema) - tag_man = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) - self.assertIsInstance(tag_man, HedTagManager, "Constructor should create a HedTypeManager from a tabular input") + remove_types = [] + tag_man1 = HedTagManager(EventManager(self.input_data, self.schema)) + self.assertIsInstance(tag_man1, HedTagManager) + hed_objs1a = tag_man1.get_hed_objs(include_context=False) + hed_objs1b = tag_man1.get_hed_objs(include_context=True) + hed_objs1c = tag_man1.get_hed_objs(include_context=False, replace_defs=True) + hed_objs1d = tag_man1.get_hed_objs(include_context=True, replace_defs=True) + tag_man2 = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) + hed_objs2a = tag_man2.get_hed_objs(include_context=False) + hed_objs2b = tag_man2.get_hed_objs(include_context=True) + self.assertIsInstance(tag_man1, HedTagManager) + self.assertIsInstance(tag_man1, HedTagManager) def test_get_hed_objs(self): event_man = EventManager(self.input_data, self.schema) - tag_man = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) - hed_objs = tag_man.get_hed_objs() - self.assertisInstance(hed_objs, list) - self.assertEqual(len(hed_objs), len(event_man.onsets)) - print("to here") - + # tag_man = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) + # hed_objs = tag_man.get_hed_objs() + # self.assertIsInstance(hed_objs, list) + # self.assertEqual(len(hed_objs), len(event_man.onsets)) + # def test_constructor_variable_caps(self): # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") diff --git a/tests/tools/analysis/test_hed_type.py b/tests/tools/analysis/test_hed_type.py index 690c09892..662b82ccb 100644 --- a/tests/tools/analysis/test_hed_type.py +++ b/tests/tools/analysis/test_hed_type.py @@ -22,22 +22,23 @@ def setUpClass(cls): hed_schema=schema), HedString('(Definition/Cond3/#, (Condition-variable/Var3, Label/#, Ellipse, Cross))', hed_schema=schema), - HedString('(Definition/Cond4, (Condition-variable, Apple, Banana))', hed_schema=schema), - HedString('(Definition/Cond5, (Condition-variable/Lumber, Apple, Banana))', hed_schema=schema), - HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Apple, Banana))', + HedString('(Definition/Cond4, (Condition-variable, Rectangle, Triangle))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Action, Sensory-presentation))', + hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Agent, Move))', hed_schema=schema)] def_dict = DefinitionDict() for value in defs: def_dict.check_for_definitions(value) - test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)", + test_strings1 = ["Sensory-event,(Def/Cond1,(Elbow, Hip, Condition-variable/Trouble),Onset)", "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", "(Def/Cond1, Offset)", "White, Black, Condition-variable/Wonder, Condition-variable/Fast", "", "(Def/Cond2, Onset)", "(Def/Cond3/4.3, Onset)", - "Arm, Leg, Condition-variable/Fast"] + "Upper-arm, Head, Condition-variable/Fast"] test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] df1 = DataFrame(test_onsets1, columns=['onset']) df1['HED'] = test_strings1 diff --git a/tests/tools/analysis/test_hed_type_defs.py b/tests/tools/analysis/test_hed_type_defs.py index b660d627e..3d66cce40 100644 --- a/tests/tools/analysis/test_hed_type_defs.py +++ b/tests/tools/analysis/test_hed_type_defs.py @@ -1,6 +1,6 @@ import os import unittest -from hed.models import DefinitionEntry +from hed.models import DefinitionDict from hed.models.hed_string import HedString from hed.models.hed_tag import HedTag from hed.models.sidecar import Sidecar @@ -14,32 +14,29 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = [HedString('Sensory-event,(Def/Cond1,(Red, Blue),Onset),(Def/Cond2,Onset),Green,Yellow', - hed_schema=schema), - HedString('(Def/Cond1, Offset)', hed_schema=schema), - HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', - hed_schema=schema), - HedString('', hed_schema=schema), - HedString('(Def/Cond2, Onset)', hed_schema=schema), - HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema), - HedString('Arm, Leg, Condition-variable/Fast', hed_schema=schema)] - def1 = HedString('(Condition-variable/Var1, Circle, Square, Description/This is def1)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', - hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana, Description/This is def4)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana, Description/This is def5)', - hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana, Description/This is def6)', - hed_schema=schema) - cls.definitions1 = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3/#, (Condition-variable/Var3, Label/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Rectangle, Triangle))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Action, Sensory-presentation))', + hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Agent, Move))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + cls.test_strings1 = ["Sensory-event,(Def/Cond1,(Elbow, Hip, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Upper-arm, Head, Condition-variable/Fast"] + cls.definitions1 = def_dict bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) events_path = os.path.realpath(os.path.join(bids_root_path, diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index f6f88fe5e..86ea84411 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -61,9 +61,9 @@ def test_do_op(self): self.assertIn(sum_op.summary_name, dispatch.summary_dicts) self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedTagSummary) x = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47) + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 44) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 44) def test_quick3(self): from hed.models import TabularInput, Sidecar From 47806cefac684bbe31351cd87999d7939edc8e4d Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sun, 27 Aug 2023 12:55:18 -0500 Subject: [PATCH 3/4] Updated the tests for summarize tags --- .../operations/summarize_hed_tags_op.py | 13 +- .../operations/test_summarize_hed_tags_op.py | 115 ++++++++++++++---- 2 files changed, 101 insertions(+), 27 deletions(-) diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index c79ba0f00..4b8b72835 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -6,7 +6,6 @@ from hed.tools.analysis.hed_tag_manager import HedTagManager from hed.tools.remodeling.operations.base_op import BaseOp from hed.tools.remodeling.operations.base_summary import BaseSummary -from hed.models.df_util import get_assembled class SummarizeHedTagsOp(BaseOp): @@ -36,7 +35,7 @@ class SummarizeHedTagsOp(BaseOp): }, "optional_parameters": { "append_timecode": bool, - "expand_context": bool, + "include_context": bool, "replace_defs": bool, "remove_types": list } @@ -45,7 +44,7 @@ class SummarizeHedTagsOp(BaseOp): SUMMARY_TYPE = "hed_tag_summary" def __init__(self, parameters): - """ Constructor for the summarize hed tags operation. + """ Constructor for the summarize_hed_tags operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -63,7 +62,7 @@ def __init__(self, parameters): self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] self.append_timecode = parameters.get('append_timecode', False) - self.expand_context = parameters.get('expand_context', True) + self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"]) @@ -111,8 +110,10 @@ def update_summary(self, new_info): """ counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) - tag_man = HedTagManager(EventManager(input_data, new_info['schema'])) - hed_objs = tag_man.get_hed_objs(self.sum_op.expand_context, self.sum_op.replace_defs) + tag_man = HedTagManager(EventManager(input_data, new_info['schema']), + remove_types=self.sum_op.remove_types) + hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, + replace_defs=self.sum_op.replace_defs) for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) self.summary_dict[new_info["name"]] = counts diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index 86ea84411..e8c0322db 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -2,6 +2,12 @@ import os import unittest import pandas as pd +from hed.models import TabularInput, Sidecar +from hed.schema import load_schema_version +from hed.tools.analysis.hed_tag_counts import HedTagCounts +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager +from io import StringIO from hed.models.df_util import get_assembled from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummary @@ -27,8 +33,11 @@ def setUpClass(cls): "Objects": ["Item"], "Properties": ["Property"] }, - "expand_context": False, + "include_context": False, + "replace_defs": False, + "remove_types": ["Condition-variable", "Task"] } + cls.base_parameters = base_parameters cls.json_parms = json.dumps(base_parameters) @classmethod @@ -39,7 +48,10 @@ def test_constructor(self): parms = json.loads(self.json_parms) sum_op1 = SummarizeHedTagsOp(parms) self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - parms["expand_context"] = "" + + def test_constructor_bad_params(self): + parms = json.loads(self.json_parms) + parms["include_context"] = "" with self.assertRaises(TypeError) as context: SummarizeHedTagsOp(parms) self.assertEqual(context.exception.args[0], "BadType") @@ -49,7 +61,7 @@ def test_constructor(self): SummarizeHedTagsOp(parms2) self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): + def test_do_op_no_replace_no_context_remove_on(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) sum_op = SummarizeHedTagsOp(parms) @@ -60,17 +72,81 @@ def test_do_op(self): self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns") self.assertIn(sum_op.summary_name, dispatch.summary_dicts) self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedTagSummary) - x = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 44) + counts = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts, HedTagCounts) + self.assertEqual(len(counts.tag_dict), 16) + self.assertIn('def', counts.tag_dict) + self.assertNotIn('task', counts.tag_dict) + self.assertNotIn('condition-variable', counts.tag_dict) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 44) + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 16) + + def test_do_op_options(self): + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.2.0']) + df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") + + # no replace, no context, types removed + parms1 = json.loads(self.json_parms) + parms1["summary_name"] = "tag summary 1" + sum_op1 = SummarizeHedTagsOp(parms1) + df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) + counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts1, HedTagCounts) + self.assertEqual(len(counts1.tag_dict), 16) + self.assertNotIn('event-context', counts1.tag_dict) + self.assertIn('def', counts1.tag_dict) + self.assertNotIn('task', counts1.tag_dict) + self.assertNotIn('condition-variable', counts1.tag_dict) + + # no replace, context, types removed + parms2 = json.loads(self.json_parms) + parms2["include_context"] = True + parms2["summary_name"] = "tag summary 2" + sum_op2 = SummarizeHedTagsOp(parms2) + df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) + counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts2, HedTagCounts) + self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) + self.assertIn('event-context', counts2.tag_dict) + self.assertIn('def', counts2.tag_dict) + self.assertNotIn('task', counts2.tag_dict) + self.assertNotIn('condition-variable', counts2.tag_dict) + + # no replace, context, types removed + parms3 = json.loads(self.json_parms) + parms3["include_context"] = True + parms3["replace_defs"] = True + parms3["summary_name"] = "tag summary 3" + sum_op3 = SummarizeHedTagsOp(parms3) + df_new3 = sum_op3.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op3, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new3), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new3.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op3.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op3.summary_name], HedTagSummary) + counts3 = dispatch.summary_dicts[sum_op3.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts3, HedTagCounts) + # self.assertEqual(len(counts3.tag_dict), 44) + # self.assertIn('event-context', counts3.tag_dict) + # self.assertNotIn('def', counts3.tag_dict) + # self.assertNotIn('task', counts3.tag_dict) + # self.assertNotIn('condition-variable', counts3.tag_dict) def test_quick3(self): - from hed.models import TabularInput, Sidecar - from hed.schema import load_schema_version - from hed.tools.analysis.hed_tag_counts import HedTagCounts - from io import StringIO - my_schema = load_schema_version('8.1.0') + include_context = True + replace_defs = True + remove_types = [] + my_schema = load_schema_version('8.2.0') my_json = { "code": { "HED": { @@ -89,19 +165,16 @@ def test_quick3(self): data = [[0.5, 0, 'code1', 'Description/This is a test, Label/Temp, (Def/Blech1, Green)'], [0.6, 0, 'code2', 'Sensory-event, ((Description/Animal, Condition-variable/Blech))']] df = pd.DataFrame(data, columns=['onset', 'duration', 'code', 'HED']) - input_data = TabularInput(df, sidecar=my_sidecar) + input_data = TabularInput(df, sidecar=my_sidecar, name="myName") + tag_man = HedTagManager(EventManager(input_data, my_schema), remove_types=remove_types) counts = HedTagCounts('myName', 2) summary_dict = {} - hed_strings, definitions = get_assembled(input_data, my_sidecar, my_schema, extra_def_dicts=None, join_columns=True, - shrink_defs=False, expand_defs=True) - for hed in hed_strings: - counts.update_event_counts(hed, 'myName') - summary_dict['myName'] = counts + # hed_objs = tag_man.get_hed_objs(include_context=include_context, replace_defs=replace_defs) + # for hed in hed_objs: + # counts.update_event_counts(hed, 'myName') + # summary_dict['myName'] = counts def test_quick4(self): - from hed.models import TabularInput, Sidecar - from hed.schema import load_schema_version - from hed.tools.analysis.hed_tag_counts import HedTagCounts path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests/')) data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv')) @@ -183,7 +256,7 @@ def test_sample_example(self): "Participant-response"], "Objects": ["Item"] }, - "expand_context": False + "include_context": False }}] sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'], From 679fe53f299c530de8665453f4ee6f8f175d6aa9 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Tue, 29 Aug 2023 10:50:31 -0500 Subject: [PATCH 4/4] Updated the event manager --- hed/tools/analysis/column_name_summary.py | 2 +- hed/tools/analysis/event_manager.py | 51 ++--------- hed/tools/analysis/hed_tag_counts.py | 3 +- hed/tools/analysis/hed_tag_manager.py | 17 ++-- hed/tools/analysis/hed_type.py | 2 +- hed/tools/analysis/hed_type_counts.py | 4 +- hed/tools/analysis/hed_type_defs.py | 57 ++++++------ hed/tools/analysis/temporal_event.py | 2 +- hed/tools/bids/bids_dataset.py | 4 +- hed/tools/bids/bids_file_group.py | 11 ++- hed/tools/bids/bids_tabular_file.py | 2 +- .../remodeling/cli/run_remodel_backup.py | 4 +- .../remodeling/cli/run_remodel_restore.py | 2 +- hed/tools/remodeling/operations/base_op.py | 2 +- .../remodeling/operations/base_summary.py | 2 +- .../remodeling/operations/split_rows_op.py | 2 +- .../operations/summarize_hed_tags_op.py | 2 +- .../operations/summarize_hed_validation_op.py | 2 +- .../summarize_sidecar_from_events_op.py | 2 +- hed/tools/visualization/tag_word_cloud.py | 6 +- hed/tools/visualization/word_cloud_util.py | 1 + readthedocs.yml | 4 +- ...est_analysis_util_get_assembled_strings.py | 10 +-- .../analysis/test_column_name_summary.py | 1 + tests/tools/analysis/test_event_manager.py | 87 +++++++------------ tests/tools/analysis/test_hed_tag_manager.py | 36 ++++---- tests/tools/analysis/test_hed_type_defs.py | 36 +++++--- .../operations/test_summarize_hed_tags_op.py | 82 ++++++++--------- 28 files changed, 199 insertions(+), 237 deletions(-) diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/column_name_summary.py index 5c7a710c9..90ed0ae88 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/column_name_summary.py @@ -26,7 +26,7 @@ def update_headers(self, column_names): return len(self.unique_headers) - 1 def get_summary(self, as_json=False): - patterns = [list() for element in self.unique_headers] + patterns = [list() for _ in self.unique_headers] for key, value in self.file_dict.items(): patterns[value].append(key) column_headers = [] diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 27882b43f..b64ac0409 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -87,28 +87,11 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): to_remove.append(tup[1]) hed.remove(to_remove) - # def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in an event context group, but is kept in a separate array without the - event context group or tag. - - """ - # contexts = [[] for _ in range(len(self.hed_strings))] - # for onset in self.onset_list: - # for i in range(onset.start_index+1, onset.end_index): - # contexts[i].append(onset.contents) - # for i in range(len(self.hed_strings)): - # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - # self.contexts = contexts - def unfold_context(self, remove_types=[]): """ Unfolds the event information into hed, base, and contexts either as arrays of str or of HedString. Parameters: remove_types (list): List of types to remove. - replace_defs (bool): If True the def term is replaced by its definition group. Returns: list of str or HedString representing the information without the events of temporal extent @@ -118,21 +101,21 @@ def unfold_context(self, remove_types=[]): """ placeholder = "" - remove_defs = self.find_type_defs(remove_types) - hed = [placeholder for _ in range(len(self.hed_strings))] + remove_defs = self.get_type_defs(remove_types) + new_hed = [placeholder for _ in range(len(self.hed_strings))] new_base = [placeholder for _ in range(len(self.hed_strings))] new_contexts = [placeholder for _ in range(len(self.hed_strings))] - base, contexts = self.expand_context() + base, contexts = self._expand_context() for index, item in enumerate(self.hed_strings): - hed[index] = self._process_hed(item, remove_types=remove_types, - remove_defs=remove_defs, remove_group=False) + new_hed[index] = self._process_hed(item, remove_types=remove_types, + remove_defs=remove_defs, remove_group=False) new_base[index] = self._process_hed(base[index], remove_types=remove_types, remove_defs=remove_defs, remove_group=True) new_contexts[index] = self._process_hed(contexts[index], remove_types=remove_types, remove_defs=remove_defs, remove_group=True) - return hed, new_base, new_contexts # these are each a list of strings + return new_hed, new_base, new_contexts # these are each a list of strings - def expand_context(self): + def _expand_context(self): """ Expands the onset and the ongoing context for additional processing. """ @@ -152,7 +135,7 @@ def _process_hed(self, hed, remove_types=[], remove_defs=[], remove_group=False) return "" # Reconvert even if hed is already a HedString to make sure a copy and expandable. hed_obj = HedString(str(hed), hed_schema=self.hed_schema, def_dict=self.def_dict) - hed_obj, temp1 = split_base_tags(hed_obj, remove_types, remove_group=False) + hed_obj, temp1 = split_base_tags(hed_obj, remove_types, remove_group=remove_group) if remove_defs: hed_obj, temp2 = split_def_tags(hed_obj, remove_defs, remove_group=remove_group) return str(hed_obj) @@ -180,24 +163,6 @@ def compress_strings(list_to_compress): result_list[index] = ",".join(item) return result_list - def find_type_defs(self, types): - """ Return a list of definition names (lower case) that correspond to one of the specified types. - - Parameters: - types (list): List of tags that are treated as types such as 'Condition-variable' - - Returns: - list: List of definition names (lower-case) that correspond to the specified types - - """ - def_names = {} - if not types: - return - for type_tag in types: - type_defs = HedTypeDefs(self.def_dict, type_tag=type_tag) - def_names[type_tag] = type_defs.def_map - return def_names - def get_type_defs(self, types): """ Return a list of definition names (lower case) that correspond to one of the specified types. diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 9790705a0..1dd86d899 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -76,13 +76,12 @@ def __init__(self, name, total_events=0): self.files = {} self.total_events = total_events - def update_event_counts(self, hed_string_obj, file_name, definitions=None): + def update_event_counts(self, hed_string_obj, file_name): """ Update the tag counts based on a hed string object. Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. file_name (str): The name of the file corresponding to these counts. - definitions (dict): The type_defs associated with the HED string. """ if file_name not in self.files: diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py index 7849d1e36..e5bdb78af 100644 --- a/hed/tools/analysis/hed_tag_manager.py +++ b/hed/tools/analysis/hed_tag_manager.py @@ -1,7 +1,7 @@ """ Manager for the HED tags in a tabular file. """ from hed.models import HedString -from hed.models.string_util import split_base_tags, split_def_tags +from hed.models.string_util import split_base_tags class HedTagManager: @@ -12,7 +12,6 @@ def __init__(self, event_manager, remove_types=[]): Parameters: event_manager (EventManager): an event manager for the tabular file. remove_types (list or None): List of type tags (such as condition-variable) to remove. - include_context (bool): If True, include the context. """ @@ -20,7 +19,7 @@ def __init__(self, event_manager, remove_types=[]): self.remove_types = remove_types self.hed_strings, self.base_strings, self.context_strings = ( self.event_manager.unfold_context(remove_types=remove_types)) - self.type_def_names = self.event_manager.find_type_defs(remove_types) + self.type_def_names = self.event_manager.get_type_defs(remove_types) # def get_hed_objs1(self, include_context=True): # hed_objs = [None for _ in range(len(self.event_manager.onsets))] @@ -38,7 +37,7 @@ def get_hed_objs(self, include_context=True, replace_defs=False): if include_context and self.context_strings[index]: hed_list.append("(Event-context, (" + self.context_strings[index] + "))") hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) - if replace_defs: + if replace_defs and hed_objs[index]: for def_tag in hed_objs[index].find_def_tags(recursive=True, include_groups=0): hed_objs[index].replace(def_tag, def_tag.expandable.get_first_group()) return hed_objs @@ -51,8 +50,8 @@ def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): hed_obj, temp = split_base_tags(hed_obj, self.remove_types, remove_group=remove_group) return hed_obj - def get_hed_string_obj(self, hed_str, filter_types=False): - hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) - # if filter_types: - # hed_obj = hed_obj - return hed_obj + # def get_hed_string_obj(self, hed_str, filter_types=False): + # hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) + # # if filter_types: + # # hed_obj = hed_obj + # return hed_obj diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py index fa57183ac..6a3bdc270 100644 --- a/hed/tools/analysis/hed_type.py +++ b/hed/tools/analysis/hed_type.py @@ -1,6 +1,6 @@ """ Manages a type variable and its associated context. """ import pandas as pd -from hed.models import HedGroup, HedString, HedTag +from hed.models import HedGroup, HedTag from hed.tools.analysis.hed_type_defs import HedTypeDefs from hed.tools.analysis.hed_type_factors import HedTypeFactors diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py index 3e4da8c9f..056bd63d7 100644 --- a/hed/tools/analysis/hed_type_counts.py +++ b/hed/tools/analysis/hed_type_counts.py @@ -123,9 +123,9 @@ def add_descriptions(self, type_defs): """ for type_val, type_count in self.type_dict.items(): - if type_val not in type_defs.type_to_def_map: + if type_val not in type_defs.type_map: continue - for level in type_defs.type_to_def_map[type_val]: + for level in type_defs.type_map[type_val]: if level not in type_count.level_counts: continue level_dict = type_defs.def_map[level] diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index 541e5a222..b6a0b8ab1 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -33,7 +33,7 @@ def __init__(self, definitions, type_tag='condition-variable'): else: self.definitions = {} self.def_map = self._extract_def_map() # dict def names vs {description, tags, type_values} - self.type_to_def_map = self._extract_type_map() # Dictionary of type_values vs dict definition names + self.type_map = self._extract_type_map() # Dictionary of type_values vs dict definition names def get_type_values(self, item): """ Return a list of type_tag values in item. @@ -45,35 +45,42 @@ def get_type_values(self, item): list: A list of the unique values associated with this type """ - def_names = self.get_def_names(item, no_value=True) - type_tag_values = [] + def_names = self.extract_def_names(item, no_value=True) + type_values = [] for def_name in def_names: - values = self.def_map.get(def_name.lower(), None) - if values and values["type_values"]: - type_tag_values = type_tag_values + values["type_values"] - return type_tag_values + values = self.def_map.get(def_name.lower(), {}) + if "type_values" in values: + type_values = type_values + values["type_values"] + return type_values - def get_type_def_names(self): - """ Return a list of definition names that have a type. + @property + def type_def_names(self): + """ List of names of definition that have this type-variable. Returns: list: definition names that have this type. """ - def_names = [] - for name, def_entry in self.def_map.items(): - if def_entry['type_values']: - def_names.append(name) - return def_names + return list(self.def_map.keys()) + + @property + def type_names(self): + """ List of names of the type-variables associated with type definitions. + + Returns: + list: type names associated with the type definitions + + """ + return list(self.type_map.keys()) def _extract_def_map(self): """ Extract type_variables associated with each definition and add them to def_map. """ def_map = {} for entry in self.definitions.values(): - type_values, description, other_tags = self._extract_entry_values(entry) - if type_values: - def_map[entry.name.lower()] = \ - {'type_values': type_values, 'description': description, 'tags': other_tags} + type_def, type_values, description, other_tags = self._extract_entry_values(entry) + if type_def: + def_map[type_def.lower()] = \ + {'def_name': type_def, 'type_values': type_values, 'description': description, 'tags': other_tags} return def_map def _extract_type_map(self): @@ -101,7 +108,8 @@ def _extract_entry_values(self, entry): """ tag_list = entry.contents.get_all_tags() - type_tag_values = [] + type_values = [] + type_def = "" description = '' other_tags = [] for hed_tag in tag_list: @@ -110,15 +118,12 @@ def _extract_entry_values(self, entry): elif hed_tag.short_base_tag.lower() != self.type_tag: other_tags.append(hed_tag.short_base_tag) else: - value = hed_tag.extension.lower() - if value: - type_tag_values.append(value) - else: - type_tag_values.append(entry.name) - return type_tag_values, description, other_tags + type_values.append(hed_tag.extension.lower()) + type_def = entry.name + return type_def, type_values, description, other_tags @staticmethod - def get_def_names(item, no_value=True): + def extract_def_names(item, no_value=True): """ Return a list of Def values in item. Parameters: diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index 44b860560..7a689609d 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -32,7 +32,7 @@ def _split_group(self, contents): to_remove.append(item) elif item.short_base_tag.lower() == "duration": to_remove.append(item) - self.end_time = self.short_time + float(item.extension.lower()) # Will need to be fixed for units + self.end_time = self.short_time + float(item.extension.lower()) # Will need to be fixed for units elif item.short_base_tag.lower() == "def": self.anchor = item.short_tag contents.remove(to_remove) diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py index fc4aa89f4..d6cd4592c 100644 --- a/hed/tools/bids/bids_dataset.py +++ b/hed/tools/bids/bids_dataset.py @@ -2,12 +2,10 @@ import os import json -from hed.errors.error_reporter import ErrorHandler from hed.schema.hed_schema import HedSchema from hed.schema.hed_schema_io import load_schema_version from hed.schema.hed_schema_group import HedSchemaGroup from hed.tools.bids.bids_file_group import BidsFileGroup -from hed.validator.hed_validator import HedValidator LIBRARY_URL_BASE = "https://raw.githubusercontent.com/hed-standard/hed-schemas/main/library_schemas/" @@ -32,7 +30,7 @@ def __init__(self, root_path, schema=None, tabular_types=None, schema (HedSchema or HedSchemaGroup): A schema that overrides the one specified in dataset. tabular_types (list or None): List of strings specifying types of tabular types to include. If None or empty, then ['events'] is assumed. - exclude_dirs=['sourcedata', 'derivatives', 'code']): + exclude_dirs=['sourcedata', 'derivatives', 'code']: """ self.root_path = os.path.realpath(root_path) diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 88fcc04c4..5a47da6ef 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -1,9 +1,8 @@ """ A group of BIDS files with specified suffix name. """ import os -from hed.errors.error_reporter import ErrorContext, ErrorHandler +from hed.errors.error_reporter import ErrorHandler from hed.validator.sidecar_validator import SidecarValidator -from hed.validator.spreadsheet_validator import SpreadsheetValidator from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.bids.bids_tabular_file import BidsTabularFile from hed.tools.bids.bids_sidecar_file import BidsSidecarFile @@ -129,10 +128,10 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings error_handler = ErrorHandler(check_for_warnings) issues = [] validator = SidecarValidator(hed_schema) - + for sidecar in self.sidecar_dict.values(): name = os.path.basename(sidecar.file_path) - issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name, + issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler) return issues @@ -155,7 +154,7 @@ def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warning for data_obj in self.datafile_dict.values(): data_obj.set_contents(overwrite=False) name = os.path.basename(data_obj.file_path) - issues += data_obj.contents.validate(hed_schema, extra_def_dicts=None, name=name, + issues += data_obj.contents.validate(hed_schema, extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler) if not keep_contents: data_obj.clear_contents() @@ -185,7 +184,7 @@ def _make_sidecar_dict(self): dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type Notes: - - This function creates the sidecars and but does not set their contents. + - This function creates the sidecars, but does not set their contents. """ files = get_file_list(self.root_path, name_suffix=self.suffix, diff --git a/hed/tools/bids/bids_tabular_file.py b/hed/tools/bids/bids_tabular_file.py index 13a46e353..f419075d7 100644 --- a/hed/tools/bids/bids_tabular_file.py +++ b/hed/tools/bids/bids_tabular_file.py @@ -1,4 +1,4 @@ -""" A BIDS tabular file including its associatedd sidecar. """ +""" A BIDS tabular file including its associated sidecar. """ import os from hed.models.tabular_input import TabularInput diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py index 5bed59e4c..6d78465dd 100644 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ b/hed/tools/remodeling/cli/run_remodel_backup.py @@ -7,7 +7,7 @@ def get_parser(): - """ Create a parser for the run_remodel_backup command-line arguments. + """ Create a parser for the run_remodel_backup command-line arguments. Returns: argparse.ArgumentParser: A parser for parsing the command line arguments. @@ -45,7 +45,7 @@ def main(arg_list=None): Otherwise, called with the command-line parameters as an argument list. :raises HedFileError: - - If the specified backup already exists. + - If the specified backup already exists. """ diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py index 7f21188d7..960bd0916 100644 --- a/hed/tools/remodeling/cli/run_remodel_restore.py +++ b/hed/tools/remodeling/cli/run_remodel_restore.py @@ -6,7 +6,7 @@ def get_parser(): - """ Create a parser for the run_remodel_restore command-line arguments. + """ Create a parser for the run_remodel_restore command-line arguments. Returns: argparse.ArgumentParser: A parser for parsing the command line arguments. diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index 15423d64d..bc3e906c6 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -4,7 +4,7 @@ class BaseOp: """ Base class for operations. All remodeling operations should extend this class. - The base class holds the parameters and does basic parameter checking against the operations specification. + The base class holds the parameters and does basic parameter checking against the operation's specification. """ diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py index 2b732ae2d..f00e81401 100644 --- a/hed/tools/remodeling/operations/base_summary.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -134,7 +134,7 @@ def _save_summary_files(self, save_dir, file_format, summary, individual_summari Parameters: save_dir (str): Path to the directory in which the summaries will be saved. file_format (str): string representing the extension (including .), '.txt' or '.json'. - summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys. + summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys). """ if self.op.append_timecode: diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 858ce7e28..ea0b5dc13 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -110,7 +110,7 @@ def _add_durations(df, add_events, duration_sources): @staticmethod def _create_onsets(df, onset_source): - """ Create a vector of onsets for the the new events. + """ Create a vector of onsets for the new events. Parameters: df (DataFrame): The dataframe to process. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 4b8b72835..ffef53fb7 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -78,7 +78,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the context. """ diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 8b4380745..cd3fc936b 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -66,7 +66,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df - Side-effect: + Side effect: Updates the relevant summary. """ diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 03353051a..016a06d6d 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -68,7 +68,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the associated summary if applicable. """ diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index c8d4159d7..68a3a257d 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -51,10 +51,10 @@ def summary_to_dict(summary, transform=np.log10, adjustment=5): adjustment(int): Value added after transform. Returns: word_dict(dict): a dict of the words and their occurrence count - + :raises KeyError: A malformed dictionary was passed - + """ if transform is None: transform = lambda x: x @@ -109,4 +109,4 @@ def load_and_resize_mask(mask_path, width=None, height=None): else: mask_image_array = np.array(mask_image) - return mask_image_array.astype(np.uint8) \ No newline at end of file + return mask_image_array.astype(np.uint8) diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index ba25e0133..6071a138a 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -39,6 +39,7 @@ def _draw_contour(wc, img): return Image.fromarray(ret) + # Replace WordCloud function with one that can handle transparency WordCloud._draw_contour = _draw_contour diff --git a/readthedocs.yml b/readthedocs.yml index bf5d7274d..a0078918c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,7 +7,7 @@ formats: build: os: "ubuntu-22.04" tools: - python: "3.8" + python: "3.7" # Build documentation in the docs/ directory with Sphinx sphinx: @@ -19,4 +19,4 @@ sphinx: python: install: - requirements: docs/requirements.txt - system_packages: true + system_packages: true \ No newline at end of file diff --git a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py index 6ede945f0..5a3972a37 100644 --- a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py +++ b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py @@ -38,7 +38,7 @@ def setUp(self): # "get_assembled_strings should not have Def-expand when expand_defs is False") # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, # "get_assembled_strings should have Def/ when expand_defs is False") - # + # # def test_get_assembled_strings_no_schema_def_expand(self): # hed_list2 = get_assembled_strings(self.input_data, self.hed_schema, expand_defs=True) # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") @@ -51,7 +51,7 @@ def setUp(self): # "get_assembled_strings should have Def-expand when expand_defs is True") # self.assertEqual(hed_strings_joined2.find("Def/"), -1, # "get_assembled_strings should not have Def/ when expand_defs is True") - # + # # def test_get_assembled_strings_with_schema_no_def_expand(self): # hed_list1 = get_assembled_strings(self. input_data, hed_schema=self.hed_schema, expand_defs=False) # self.assertIsInstance(hed_list1, list, "get_assembled_strings returns a list when expand defs is False") @@ -64,7 +64,7 @@ def setUp(self): # "get_assembled_strings does not have Def-expand when expand_defs is False") # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, # "get_assembled_strings should have Def/ when expand_defs is False") - # + # # def test_get_assembled_strings_with_schema_def_expand(self): # hed_list2 = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=True) # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") @@ -77,7 +77,7 @@ def setUp(self): # "get_assembled_strings should have Def-expand when expand_defs is True") # self.assertEqual(hed_strings_joined2.find("Def/"), -1, # "get_assembled_strings should not have Def/ when expand_defs is True") - # + # # def test_get_assembled_strings_no_sidecar_no_schema(self): # input_data = TabularInput(self.events_path, name="face_sub1_events") # hed_list1 = get_assembled_strings(input_data, expand_defs=False) @@ -92,7 +92,7 @@ def setUp(self): # self.assertIsInstance(hed_list2[0], HedString, # "get_assembled_string should return an HedString when no sidecar") # self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar") - # + # # def test_get_assembled_strings_no_sidecar_schema(self): # input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, name="face_sub1_events") # hed_list1 = get_assembled_strings(input_data, expand_defs=False) diff --git a/tests/tools/analysis/test_column_name_summary.py b/tests/tools/analysis/test_column_name_summary.py index 31cb551c0..57c6ba4ba 100644 --- a/tests/tools/analysis/test_column_name_summary.py +++ b/tests/tools/analysis/test_column_name_summary.py @@ -54,6 +54,7 @@ def test_get_summary(self): column_summary.update('run-01', self.columns1) column_summary.update('run-02', self.columns1) summary1 = column_summary.get_summary() + self.assertIsInstance(summary1, dict) column_summary.update('run-03', self.columns2) column_summary.update('run-04', self.columns3) summary2 = column_summary.get_summary() diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index 4e7ee1051..3094c8cec 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -30,74 +30,53 @@ def test_constructor(self): self.assertEqual(len(manager1.event_list[0]), 2) self.assertIsInstance(manager1.hed_strings, list) self.assertEqual(len(manager1.hed_strings), len(self.input_data.dataframe)) - # self.assertEqual(len(manager1.event_list), len(self.input_data.dataframe)) - # event_count = 0 - # for index, item in enumerate(manager1.event_list): - # for event in item: - # event_count = event_count + 1 - # self.assertFalse(event.duration) - # self.assertTrue(event.end_index) - # self.assertEqual(event.start_index, index) - # self.assertEqual(event.start_index, index) - # self.assertEqual(event.start_time, manager1.data.dataframe.loc[index, "onset"]) - # if not event.end_time: - # self.assertEqual(event.end_index, len(manager1.data.dataframe)) + self.assertEqual(len(manager1.event_list), len(self.input_data.dataframe)) + event_count = 0 + for index, item in enumerate(manager1.event_list): + for event in item: + event_count = event_count + 1 + self.assertTrue(event.end_index) + self.assertEqual(event.start_index, index) + self.assertEqual(event.start_index, index) + self.assertEqual(event.start_time, float(manager1.input_data.dataframe.loc[index, "onset"])) + if not event.end_time: + self.assertEqual(event.end_index, len(manager1.input_data.dataframe)) - def test_unfold_context(self): + def test_unfold_context_no_remove(self): manager1 = EventManager(self.input_data, self.schema) hed, base, context = manager1.unfold_context() for index in range(len(manager1.onsets)): self.assertIsInstance(hed[index], str) self.assertIsInstance(base[index], str) + + def test_unfold_context_remove(self): + manager1 = EventManager(self.input_data, self.schema) + hed, base, context = manager1.unfold_context(remove_types=['Condition-variable', 'Task']) + for index in range(len(manager1.onsets)): + self.assertIsInstance(hed[index], str) + self.assertIsInstance(base[index], str) # ToDo finish tests def test_str_list_to_hed(self): - manager1 = EventManager(self.input_data, self.schema) - hed, base, context = manager1.unfold_context() - hedObj1 = manager1.str_list_to_hed(['', '', '']) - self.assertFalse(hedObj1) - hedObj2 = manager1.str_list_to_hed([hed[0], base[0]]) - self.assertIsInstance(hedObj2, HedString) + manager = EventManager(self.input_data, self.schema) + hed_obj1 = manager.str_list_to_hed(['', '', '']) + self.assertFalse(hed_obj1) + hed, base, context = manager.unfold_context() + + hed_obj2 = manager.str_list_to_hed([hed[1], base[1], '(Event-context, (' + context[1] + '))']) + self.assertIsInstance(hed_obj2, HedString) + self.assertEqual(10, len(hed_obj2.children)) + hed3, base3, context3 = manager.unfold_context(remove_types=['Condition-variable', 'Task']) + + hed_obj3 = manager.str_list_to_hed([hed3[1], base3[1], '(Event-context, (' + context3[1] + '))']) + self.assertIsInstance(hed_obj3, HedString) + self.assertEqual(6, len(hed_obj3.children)) def test_get_type_defs(self): manager1 = EventManager(self.input_data, self.schema) def_names = manager1.get_type_defs(["Condition-variable", "task"]) self.assertIsInstance(def_names, list) - - def test_fix_list(self): - list1 = [[], [HedString('Red,Black', self.schema), HedString('(Green,Blue)', self.schema)], - [HedString('Red,Black', self.schema), HedString('(Green,Blue)', self.schema)]] - # a = EventManager.fix_list(list1, self.schema) - # b = EventManager.fix_list(list1, self.schema, as_string=True) - x = HedString("Red,Black", self.schema) - y = [HedString('Red,Black', self.schema), HedString('(Green,Blue)', self.schema)] - # ToDo finish test - - # def test_iter(self): - # hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - # manager1 = HedContextManager(hed_strings, self.schema) - # i = 0 - # for hed, context in manager1.iter_context(): - # self.assertEqual(hed, manager1.hed_strings[i]) - # self.assertEqual(context, manager1.contexts[i]) - # i = i + 1 - - # def test_constructor_from_assembled(self): - # hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - # manager1 = HedContextManager(hed_strings, self.schema) - # self.assertEqual(len(manager1.hed_strings), 200, - # "The constructor for assembled strings has expected # of strings") - # self.assertEqual(len(manager1.onset_list), 261, - # "The constructor for assembled strings has onset_list of correct length") - - # def test_constructor_unmatched(self): - # with self.assertRaises(HedFileError) as context: - # HedContextManager(self.test_strings2, self.schema) - # self.assertEqual(context.exception.args[0], 'UnmatchedOffset') - - # def test_constructor_multiple_values(self): - # manager = HedContextManager(self.test_strings3, self.schema) - # self.assertEqual(len(manager.onset_list), 3, "Constructor should have right number of onsets") + self.assertEqual(11, len(def_names)) if __name__ == '__main__': diff --git a/tests/tools/analysis/test_hed_tag_manager.py b/tests/tools/analysis/test_hed_tag_manager.py index dcdaa0ddd..cca0b551b 100644 --- a/tests/tools/analysis/test_hed_tag_manager.py +++ b/tests/tools/analysis/test_hed_tag_manager.py @@ -3,8 +3,6 @@ from pandas import DataFrame from hed.models import DefinitionDict from hed.models.hed_string import HedString -from hed.models.hed_tag import HedTag -from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version from hed.tools.analysis.event_manager import EventManager @@ -61,7 +59,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) cls.input_data = TabularInput(events_path, sidecar_path) cls.schema = schema @@ -75,21 +73,23 @@ def setUpClass(cls): def test_constructor_from_tabular_input(self): event_man = EventManager(self.input_data, self.schema) - remove_types = [] tag_man1 = HedTagManager(EventManager(self.input_data, self.schema)) self.assertIsInstance(tag_man1, HedTagManager) - hed_objs1a = tag_man1.get_hed_objs(include_context=False) - hed_objs1b = tag_man1.get_hed_objs(include_context=True) + hed_objs1a = tag_man1.get_hed_objs(include_context=False, replace_defs=False) + hed_objs1b = tag_man1.get_hed_objs(include_context=True, replace_defs=False) hed_objs1c = tag_man1.get_hed_objs(include_context=False, replace_defs=True) hed_objs1d = tag_man1.get_hed_objs(include_context=True, replace_defs=True) tag_man2 = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) - hed_objs2a = tag_man2.get_hed_objs(include_context=False) - hed_objs2b = tag_man2.get_hed_objs(include_context=True) - self.assertIsInstance(tag_man1, HedTagManager) - self.assertIsInstance(tag_man1, HedTagManager) + hed_objs2a = tag_man2.get_hed_objs(include_context=False, replace_defs=False) + hed_objs2b = tag_man2.get_hed_objs(include_context=True, replace_defs=False) + hed_objs1c = tag_man2.get_hed_objs(include_context=False, replace_defs=True) + hed_objs1d = tag_man2.get_hed_objs(include_context=True, replace_defs=True) + self.assertIsInstance(tag_man2, HedTagManager) + self.assertIsInstance(tag_man2, HedTagManager) def test_get_hed_objs(self): event_man = EventManager(self.input_data, self.schema) + tag_man1 = HedTagManager(EventManager(self.input_data, self.schema)) # tag_man = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) # hed_objs = tag_man.get_hed_objs() # self.assertIsInstance(hed_objs, list) @@ -101,19 +101,19 @@ def test_get_hed_objs(self): # event_man = EventManager(input_data, self.schema) # var_manager = HedType(event_man, 'run-01') # self.assertIsInstance(var_manager, HedType, "Constructor should create a HedTypeManager variable caps") - # + # # def test_constructor_multiple_values(self): # type_var = HedType(self.event_man2, 'test-it') # self.assertIsInstance(type_var, HedType, "Constructor should create a HedType from an event manager") # self.assertEqual(len(type_var._type_map), 3, # "Constructor should have right number of type_variables if multiple") - # + # # def test_constructor_unmatched(self): # with self.assertRaises(KeyError) as context: # event_man = EventManager(self.input_data3, self.schema, extra_defs=self.def_dict) # HedType(event_man, 'run-01') # self.assertEqual(context.exception.args[0], 'cond3') - # + # # def test_get_variable_factors(self): # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") @@ -128,7 +128,7 @@ def test_get_hed_objs(self): # self.assertEqual(len(df_new2.columns), 3) # df_new3 = var_manager.get_type_factors(type_values=["junk"]) # self.assertIsNone(df_new3) - # + # # def test_str(self): # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") @@ -136,7 +136,7 @@ def test_get_hed_objs(self): # var_manager = HedType(event_man, 'run-01') # new_str = str(var_manager) # self.assertIsInstance(new_str, str) - # + # # def test_summarize_variables(self): # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") @@ -146,7 +146,7 @@ def test_get_hed_objs(self): # self.assertIsInstance(summary, dict, "get_summary produces a dictionary if not json") # self.assertEqual(len(summary), 3, "Summarize_variables has right number of condition type_variables") # self.assertIn("key-assignment", summary, "get_summary has a correct key") - # + # # def test_extract_definition_variables(self): # var_manager = HedType(self.event_man1, 'run-01') # var_levels = var_manager._type_map['var3'].levels @@ -156,12 +156,12 @@ def test_get_hed_objs(self): # var_manager._extract_definition_variables(tag, 5) # self.assertIn('cond3/7', var_levels, # "_extract_definition_variables after extraction def/cond3/7 not in levels") - # + # # def test_get_variable_names(self): # conditions1 = HedType(self.event_man1, 'run-01') # list1 = conditions1.get_type_value_names() # self.assertEqual(len(list1), 8, "get_variable_tags list should have the right length") - # + # # def test_get_variable_def_names(self): # conditions1 = HedType(self.event_man1, 'run-01') # list1 = conditions1.get_type_def_names() diff --git a/tests/tools/analysis/test_hed_type_defs.py b/tests/tools/analysis/test_hed_type_defs.py index 3d66cce40..9e64c3298 100644 --- a/tests/tools/analysis/test_hed_type_defs.py +++ b/tests/tools/analysis/test_hed_type_defs.py @@ -62,39 +62,55 @@ def test_constructor_from_sidecar(self): "Constructor should create a HedTypeDefinitions from a tabular input") self.assertEqual(len(def_man.def_map), 8, "Constructor condition_map should have the right length") self.assertEqual(len(def_man.definitions), len(definitions)) - defs = def_man.get_type_def_names() + defs = def_man.type_def_names self.assertIsInstance(defs, list) self.assertEqual(len(defs), 8) - def test_get_vars(self): + def test_constructor_from_tabular(self): + def_dict = self.input_data.get_def_dict(self.schema) + def_man = HedTypeDefs(def_dict, type_tag="Condition-variable") + self.assertIsInstance(def_man, HedTypeDefs) + self.assertEqual(len(def_man.def_map), 8) + self.assertEqual(len(def_man.type_map), 3) + self.assertEqual(len(def_man.type_def_names), 8) + + def test_get_type_values_tabular(self): + def_dict = self.input_data.get_def_dict(self.schema) + def_man = HedTypeDefs(def_dict, type_tag="Condition-variable") + test_str = HedString("Sensory-event, Def/Right-sym-cond", self.schema) + values1 = def_man.get_type_values(test_str) + self.assertIsInstance(values1, list) + self.assertEqual(1, len(values1)) + + def test_get_type_values(self): def_man = HedTypeDefs(self.definitions1) item1 = HedString("Sensory-event,((Red,Blue)),", self.schema) vars1 = def_man.get_type_values(item1) self.assertFalse(vars1, "get_type_values should return None if no condition type_variables") item2 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))", self.schema) vars2 = def_man.get_type_values(item2) - self.assertEqual(len(vars2), 1, "get_type_values should return correct number of condition type_variables") + self.assertEqual(1, len(vars2), "get_type_values should return correct number of condition type_variables") item3 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))," f"(Def/Cond2),Green,Yellow,Def/Cond5, Def/Cond6/4, Description/Tell me", self.schema) vars3 = def_man.get_type_values(item3) self.assertEqual(len(vars3), 5, "get_type_values should return multiple condition type_variables") - def test_get_def_names(self): + def test_extract_def_names(self): def_man = HedTypeDefs(self.definitions1) - a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) + a = def_man.extract_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) self.assertEqual(len(a), 1, "get_def_names returns 1 item if single tag") self.assertEqual(a[0], 'cond3', "get_def_names returns the correct item if single tag") - b = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema), no_value=False) + b = def_man.extract_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema), no_value=False) self.assertEqual(len(b), 1, "get_def_names returns 1 item if single tag") self.assertEqual(b[0], 'cond3/4', "get_def_names returns the correct item if single tag") - c = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=self.schema)) + c = def_man.extract_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=self.schema)) self.assertEqual(len(c), 1, "get_def_names returns 1 item if single group def") self.assertEqual(c[0], 'cond3', "get_def_names returns the correct item if single group def") - d = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=self.schema), - no_value=False) + d = def_man.extract_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', + hed_schema=self.schema), no_value=False) self.assertEqual(len(d), 3, "get_def_names returns right number of items if multiple defs") self.assertEqual(d[0], 'cond3/6', "get_def_names returns the correct item if multiple def") - e = def_man.get_def_names(HedString('((Red, Blue, (Green), Black))', hed_schema=self.schema)) + e = def_man.extract_def_names(HedString('((Red, Blue, (Green), Black))', hed_schema=self.schema)) self.assertFalse(e, "get_def_names returns no items if no defs") def test_split_name(self): diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index e8c0322db..3e2a2d508 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -85,42 +85,42 @@ def test_do_op_options(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.2.0']) df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") - # no replace, no context, types removed - parms1 = json.loads(self.json_parms) - parms1["summary_name"] = "tag summary 1" - sum_op1 = SummarizeHedTagsOp(parms1) - df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) - counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] - self.assertIsInstance(counts1, HedTagCounts) - self.assertEqual(len(counts1.tag_dict), 16) - self.assertNotIn('event-context', counts1.tag_dict) - self.assertIn('def', counts1.tag_dict) - self.assertNotIn('task', counts1.tag_dict) - self.assertNotIn('condition-variable', counts1.tag_dict) - - # no replace, context, types removed - parms2 = json.loads(self.json_parms) - parms2["include_context"] = True - parms2["summary_name"] = "tag summary 2" - sum_op2 = SummarizeHedTagsOp(parms2) - df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) - self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") - self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) - counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] - self.assertIsInstance(counts2, HedTagCounts) - self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) - self.assertIn('event-context', counts2.tag_dict) - self.assertIn('def', counts2.tag_dict) - self.assertNotIn('task', counts2.tag_dict) - self.assertNotIn('condition-variable', counts2.tag_dict) + # # no replace, no context, types removed + # parms1 = json.loads(self.json_parms) + # parms1["summary_name"] = "tag summary 1" + # sum_op1 = SummarizeHedTagsOp(parms1) + # df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + # self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") + # self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") + # self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") + # self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) + # self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) + # counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] + # self.assertIsInstance(counts1, HedTagCounts) + # self.assertEqual(len(counts1.tag_dict), 16) + # self.assertNotIn('event-context', counts1.tag_dict) + # self.assertIn('def', counts1.tag_dict) + # self.assertNotIn('task', counts1.tag_dict) + # self.assertNotIn('condition-variable', counts1.tag_dict) + # + # # no replace, context, types removed + # parms2 = json.loads(self.json_parms) + # parms2["include_context"] = True + # parms2["summary_name"] = "tag summary 2" + # sum_op2 = SummarizeHedTagsOp(parms2) + # df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + # self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") + # self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") + # self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") + # self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) + # self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) + # counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] + # self.assertIsInstance(counts2, HedTagCounts) + # self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) + # self.assertIn('event-context', counts2.tag_dict) + # self.assertIn('def', counts2.tag_dict) + # self.assertNotIn('task', counts2.tag_dict) + # self.assertNotIn('condition-variable', counts2.tag_dict) # no replace, context, types removed parms3 = json.loads(self.json_parms) @@ -136,11 +136,11 @@ def test_do_op_options(self): self.assertIsInstance(dispatch.summary_dicts[sum_op3.summary_name], HedTagSummary) counts3 = dispatch.summary_dicts[sum_op3.summary_name].summary_dict['subj2_run1'] self.assertIsInstance(counts3, HedTagCounts) - # self.assertEqual(len(counts3.tag_dict), 44) - # self.assertIn('event-context', counts3.tag_dict) - # self.assertNotIn('def', counts3.tag_dict) - # self.assertNotIn('task', counts3.tag_dict) - # self.assertNotIn('condition-variable', counts3.tag_dict) + self.assertEqual(32, len(counts3.tag_dict)) + # self.assertIn('event-context', counts3.tag_dict) TODO: Fix this + self.assertNotIn('def', counts3.tag_dict) + self.assertNotIn('task', counts3.tag_dict) + self.assertNotIn('condition-variable', counts3.tag_dict) def test_quick3(self): include_context = True