diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 0e7190498..69c345958 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -172,7 +172,7 @@ def expand_defs(self, hed_schema, def_dict): Parameters: hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand + def_dict (DefinitionDict): The type_defs to expand """ from df_util import expand_defs expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns()) @@ -325,7 +325,7 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No Parameters: hed_schema(HedSchema): The schema to use for validation - extra_def_dicts(list of DefDict or DefDict): all definitions to use for validation + extra_def_dicts(list of DefDict or DefDict): all type_defs to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None Returns: @@ -470,7 +470,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): Note: Baseclass implementation returns just extra_def_dicts. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions(if needed) + hed_schema(HedSchema): used to identify tags to find type_defs(if needed) extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 761ab81a9..4cf66619f 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -23,7 +23,7 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None Parameters: sidecar (Sidecar): A sidecar to gather column data from. tag_columns: (list): A list of ints or strings containing the columns that contain the HED tags. - Sidecar column definitions will take precedent if there is a conflict with tag_columns. + Sidecar column type_defs will take precedent if there is a conflict with tag_columns. column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag prefixes to prepend to the tags in that column before processing. optional_tag_columns (list): A list of ints or strings containing the columns that contain @@ -383,7 +383,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Return def dicts from every column description. Parameters: - hed_schema (Schema): A HED schema object to use for extracting definitions. + hed_schema (Schema): A HED schema object to use for extracting type_defs. extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 662ec2e54..c3626a9a5 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -81,14 +81,14 @@ def get_group(self): class DefExpandGatherer: - """Class for gathering definitions from a series of def-expands, including possibly ambiguous ones""" + """Class for gathering type_defs from a series of def-expands, including possibly ambiguous ones""" def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None): """Initialize the DefExpandGatherer class. Parameters: hed_schema (HedSchema): The HED schema to be used for processing. - known_defs (dict, optional): A dictionary of known definitions. - ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. + known_defs (dict, optional): A dictionary of known type_defs. + ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand type_defs. """ self.hed_schema = hed_schema @@ -101,10 +101,10 @@ def process_def_expands(self, hed_strings, known_defs=None): Parameters: hed_strings (pd.Series or list): A Pandas Series or list of HED strings to be processed. - known_defs (dict, optional): A dictionary of known definitions to be added. + known_defs (dict, optional): A dictionary of known type_defs to be added. Returns: - tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. + tuple: A tuple containing the DefinitionDict, ambiguous type_defs, and errors. """ if not isinstance(hed_strings, pd.Series): hed_strings = pd.Series(hed_strings) @@ -120,7 +120,7 @@ def process_def_expands(self, hed_strings, known_defs=None): return self.def_dict, self.ambiguous_defs, self.errors def _process_def_expand(self, string): - """Process a single HED string to extract definitions and handle known and ambiguous definitions. + """Process a single HED string to extract type_defs and handle known and ambiguous type_defs. Parameters: string (str): The HED string to be processed. diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 5a7c5e5a2..fae257659 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -7,16 +7,16 @@ class DefinitionDict: - """ Gathers definitions from a single source. + """ Gathers type_defs from a single source. """ def __init__(self, def_dicts=None, hed_schema=None): - """ Definitions to be considered a single source. - + """ Definitions to be considered a single source. + Parameters: - def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or - a single string whose definitions should be added. + def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or + a single string whose type_defs should be added. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: @@ -30,14 +30,14 @@ def __init__(self, def_dicts=None, hed_schema=None): self.add_definitions(def_dicts, hed_schema) def add_definitions(self, def_dicts, hed_schema=None): - """ Add definitions from dict(s) to this dict. + """ Add type_defs from dict(s) to this dict. Parameters: - def_dicts (list or DefinitionDict or dict): DefinitionDict or list of DefinitionDicts/strings/dicts whose + def_dicts (list, DefinitionDict, or dict): DefinitionDict or list of DefinitionDicts/strings/dicts whose definitions should be added. Note dict form expects DefinitionEntries in the same form as a DefinitionDict hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. - + :raises TypeError: - Bad type passed as def_dicts """ @@ -63,7 +63,7 @@ def _add_definition(self, def_tag, def_value): self.defs[def_tag] = def_value def _add_definitions_from_dict(self, def_dict): - """ Add the definitions found in the given definition dictionary to this mapper. + """ Add the type_defs found in the given definition dictionary to this mapper. Parameters: def_dict (DefinitionDict or dict): DefDict whose definitions should be added. @@ -92,29 +92,29 @@ def __len__(self): return len(self.defs) def items(self): - """ Returns the dictionary of definitions + """ Returns the dictionary of type_defs Alias for .defs.items() Returns: - def_entries({str: DefinitionEntry}): A list of definitions + def_entries({str: DefinitionEntry}): A list of type_defs """ return self.defs.items() @property def issues(self): - """Returns issues about duplicate definitions.""" + """Returns issues about duplicate type_defs.""" return self._issues def check_for_definitions(self, hed_string_obj, error_handler=None): """ Check string for definition tags, adding them to self. Parameters: - hed_string_obj (HedString): A single hed string to gather definitions from. - error_handler (ErrorHandler or None): Error context used to identify where definitions are found. + hed_string_obj (HedString): A single hed string to gather type_defs from. + error_handler (ErrorHandler or None): Error context used to identify where type_defs are found. Returns: - list: List of issues encountered in checking for definitions. Each issue is a dictionary. + list: List of issues encountered in checking for type_defs. Each issue is a dictionary. """ def_issues = [] for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}): @@ -302,7 +302,7 @@ def get_as_strings(def_dict): """ Convert the entries to strings of the contents Parameters: - def_dict(DefinitionDict or dict): A dict of definitions + def_dict(DefinitionDict or dict): A dict of type_defs Returns: dict(str: str): definition name and contents diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 0a9373d1e..34a891f84 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -29,7 +29,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_ Returns: tuple: hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings - def_dict(DefinitionDict): The definitions from this Sidecar + def_dict(DefinitionDict): The type_defs from this Sidecar """ if isinstance(sidecar, str): sidecar = Sidecar(sidecar) @@ -105,7 +105,7 @@ def expand_defs(df, hed_schema, def_dict, columns=None): Parameters: df (pd.Dataframe or pd.Series): The dataframe or series to modify hed_schema (HedSchema or None): The schema to use to identify defs - def_dict (DefinitionDict): The definitions to expand + def_dict (DefinitionDict): The type_defs to expand columns (list or None): The columns to modify on the dataframe """ if isinstance(df, pd.Series): @@ -133,18 +133,18 @@ def _expand_defs(hed_string, hed_schema, def_dict): def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): - """ Gather def-expand tags in the strings/compare with known definitions to find any differences + """ Gather def-expand tags in the strings/compare with known type_defs to find any differences Parameters: hed_strings (list or pd.Series): A list of HED strings to process. hed_schema (HedSchema): The schema to use known_defs (DefinitionDict or list or str or None): - A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must + A DefinitionDict or anything its constructor takes. These are the known type_defs going in, that must match perfectly. - ambiguous_defs (dict): A dictionary containing ambiguous definitions + ambiguous_defs (dict): A dictionary containing ambiguous type_defs format TBD. Currently def name key: list of lists of HED tags values Returns: - tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. + tuple: A tuple containing the DefinitionDict, ambiguous type_defs, and errors. """ from hed.models.def_expand_gather import DefExpandGatherer diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index eaeb48371..09b57019e 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -114,7 +114,7 @@ def copy(self): def remove_definitions(self): """ Remove definition tags and groups from this string. - This does not validate definitions and will blindly removing invalid ones as well. + This does not validate type_defs and will blindly removing invalid ones as well. """ definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1) if definition_groups: @@ -178,7 +178,7 @@ def split_into_groups(hed_string, hed_schema, def_dict=None): Parameters: hed_string (str): A hed string consisting of tags and tag groups to be processed. hed_schema (HedSchema): HED schema to use to identify tags. - def_dict(DefinitionDict): The definitions to identify + def_dict(DefinitionDict): The type_defs to identify Returns: list: A list of HedTag and/or HedGroup. diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py index 5fdb54cda..3aed6608a 100644 --- a/hed/models/model_constants.py +++ b/hed/models/model_constants.py @@ -5,7 +5,7 @@ class DefTagNames: - """ Source names for definitions, def labels, and expanded labels""" + """ Source names for type_defs, def labels, and expanded labels""" DEF_ORG_KEY = 'Def' DEF_EXPAND_ORG_KEY = 'Def-expand' diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index d7d77a09b..be4ed9614 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -54,12 +54,12 @@ def all_hed_columns(self): @property def def_dict(self): - """This is the definitions from this sidecar. + """This is the type_defs from this sidecar. - Generally you should instead call get_def_dict to get the relevant definitions + Generally you should instead call get_def_dict to get the relevant type_defs Returns: - DefinitionDict: The definitions for this sidecar + DefinitionDict: The type_defs for this sidecar """ return self._def_dict @@ -76,7 +76,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): used to identify tags to find type_defs extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: @@ -192,14 +192,14 @@ def _load_json_file(self, fp): raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e def extract_definitions(self, hed_schema, error_handler=None): - """ Gather and validate definitions in metadata. + """ Gather and validate type_defs in metadata. Parameters: hed_schema (HedSchema): The schema to used to identify tags. error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None. Returns: - DefinitionDict: Contains all the definitions located in the sidecar. + DefinitionDict: Contains all the type_defs located in the sidecar. """ if error_handler is None: diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index 92e63cdd5..4511f074f 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -58,7 +58,7 @@ def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: - hed_schema(HedSchema): used to identify tags to find definitions + hed_schema(HedSchema): used to identify tags to find type_defs extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/timeseries_input.py b/hed/models/timeseries_input.py index 0b9cbee18..125800d18 100644 --- a/hed/models/timeseries_input.py +++ b/hed/models/timeseries_input.py @@ -17,7 +17,7 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None, name=None): name (str): The name to display for this file for error purposes. Notes: - - The extra_def_dicts are external definitions that override the ones in the object. + - The extra_def_dicts are external type_defs that override the ones in the object. """ diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index d3a135e2c..435af03eb 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -2,9 +2,9 @@ from .analysis.file_dictionary import FileDictionary # from .analysis.hed_context_manager import OnsetGroup, HedContextManager -from .analysis.hed_type_definitions import HedTypeDefinitions +from .analysis.hed_type_defs import HedTypeDefs from .analysis.hed_type_factors import HedTypeFactors -from .analysis.hed_type_values import HedTypeValues +from .analysis.hed_type import HedType from .analysis.hed_type_manager import HedTypeManager from .analysis.hed_type_counts import HedTypeCount from .analysis.key_map import KeyMap diff --git a/hed/tools/analysis/__init__.py b/hed/tools/analysis/__init__.py index 124390237..82bf112d7 100644 --- a/hed/tools/analysis/__init__.py +++ b/hed/tools/analysis/__init__.py @@ -1,9 +1,9 @@ """ Basic analysis tools. """ from .file_dictionary import FileDictionary # from .hed_context_manager import OnsetGroup, HedContextManager -from .hed_type_definitions import HedTypeDefinitions +from .hed_type_defs import HedTypeDefs from .hed_type_factors import HedTypeFactors -from .hed_type_values import HedTypeValues +from .hed_type import HedType from .hed_type_manager import HedTypeManager from .hed_type_counts import HedTypeCount from .key_map import KeyMap diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py index 37f2b9b9d..475ca209c 100644 --- a/hed/tools/analysis/analysis_util.py +++ b/hed/tools/analysis/analysis_util.py @@ -14,11 +14,11 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs Parameters: data_input (TabularInput): The tabular input file whose HED annotations are to be assembled. - sidecar (Sidecar): Sidecar with definitions. + sidecar (Sidecar): Sidecar with type_defs. schema (HedSchema): Hed schema columns_included (list or None): A list of additional column names to include. If None, only the list of assembled tags is included. - expand_defs (bool): If True, definitions are expanded when the events are assembled. + expand_defs (bool): If True, type_defs are expanded when the events are assembled. Returns: DataFrame or None: A DataFrame with the assembled events. @@ -41,7 +41,7 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs else: df = data_input.dataframe[eligible_columns].copy(deep=True) df['HED_assembled'] = hed_string_list - # definitions = data_input.get_definitions().gathered_defs + # type_defs = data_input.get_definitions().gathered_defs return df, definitions @@ -95,7 +95,7 @@ def search_strings(hed_strings, queries, query_names=None): :raises ValueError: - If query names are invalid or duplicated. - + """ expression_parsers, query_names = get_expression_parsers(queries, query_names=query_names) @@ -113,7 +113,7 @@ def search_strings(hed_strings, queries, query_names=None): # Parameters: # table (TabularInput): The input file to be searched. # hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form. -# expand_defs (bool): If True, definitions are expanded when the events are assembled. +# expand_defs (bool): If True, type_defs are expanded when the events are assembled. # # Returns: # list: A list of HedString objects. @@ -139,7 +139,7 @@ def search_strings(hed_strings, queries, query_names=None): # """ # # eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included) -# hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, +# hed_list, type_defs = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, # shrink_defs=False, expand_defs=True) # expression = QueryParser(query) # hed_tags = [] @@ -187,7 +187,7 @@ def search_strings(hed_strings, queries, query_names=None): # list: A list of the removed Defs. # # Notes: -# - the hed_string_obj passed in no longer has definitions. +# - the hed_string_obj passed in no longer has type_defs. # # """ # to_remove = [] diff --git a/hed/tools/analysis/annotation_util.py b/hed/tools/analysis/annotation_util.py index 61704d625..a0942068e 100644 --- a/hed/tools/analysis/annotation_util.py +++ b/hed/tools/analysis/annotation_util.py @@ -319,7 +319,7 @@ def _flatten_val_col(col_key, col_dict): # # Returns: # str: A HED string extracted from the row. -# str: A string representing the description (without the Description tag. +# str: A string representing the description (without the Description tag). # # Notes: # If description_tag is True the entire tag string is included with description. diff --git a/hed/tools/analysis/column_name_summary.py b/hed/tools/analysis/column_name_summary.py index 5c7a710c9..90ed0ae88 100644 --- a/hed/tools/analysis/column_name_summary.py +++ b/hed/tools/analysis/column_name_summary.py @@ -26,7 +26,7 @@ def update_headers(self, column_names): return len(self.unique_headers) - 1 def get_summary(self, as_json=False): - patterns = [list() for element in self.unique_headers] + patterns = [list() for _ in self.unique_headers] for key, value in self.file_dict.items(): patterns[value].append(key) column_headers = [] diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 648f96358..b64ac0409 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -1,8 +1,11 @@ """ Manages events of temporal extent. """ +from hed.models import HedString from hed.models.model_constants import DefTagNames from hed.models.df_util import get_assembled +from hed.models.string_util import split_base_tags, split_def_tags from hed.tools.analysis.temporal_event import TemporalEvent +from hed.tools.analysis.hed_type_defs import HedTypeDefs class EventManager: @@ -13,7 +16,7 @@ def __init__(self, input_data, hed_schema, extra_defs=None): Parameters: input_data (TabularInput): Represents an events file with its sidecar. hed_schema (HedSchema): HED schema used in this - extra_defs (DefinitionDict): Extra definitions not included in the input_data information. + extra_defs (DefinitionDict): Extra type_defs not included in the input_data information. :raises HedFileError: - if there are any unmatched offsets. @@ -25,43 +28,18 @@ def __init__(self, input_data, hed_schema, extra_defs=None): self.event_list = [[] for _ in range(len(input_data.dataframe))] self.hed_schema = hed_schema + self.input_data = input_data self.def_dict = input_data.get_def_dict(hed_schema, extra_def_dicts=extra_defs) self.onsets = input_data.dataframe['onset'].tolist() self.hed_strings = None # Remaining HED strings copy.deepcopy(hed_strings) - self.anchor_dict = {} self._create_event_list(input_data) - self._create_anchor_list() - - # def iter_context(self): - # """ Iterate rows of context. - # - # Yields: - # int: position in the dataFrame - # HedString: Context - # - # """ - # - # for index in range(len(self.contexts)): - # yield index, self.contexts[index] - - def _create_anchor_list(self): - """ Populate the dictionary of def names to list of temporal events. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - for index, events in enumerate(self.event_list): - for event in events: - index_list = self.anchor_dict.get(event.anchor, []) - index_list.append(event) - self.anchor_dict[event.anchor] = index_list def _create_event_list(self, input_data): """ Populate the event_list with the events with temporal extent indexed by event number. + Parameters: + input_data (TabularInput): A tabular input that includes its relevant sidecar. + :raises HedFileError: - If the hed_strings contain unmatched offsets. @@ -88,7 +66,7 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): onset_dict (dict): Running dict that keeps track of temporal events that haven't yet ended. Note: - This removes the events of temporal extent from the HED string. + This removes the events of temporal extent from hed. """ if not hed: @@ -109,45 +87,105 @@ def _extract_temporal_events(self, hed, event_index, onset_dict): to_remove.append(tup[1]) hed.remove(to_remove) - def _set_event_contexts(self): - """ Creates an event context for each hed string. + def unfold_context(self, remove_types=[]): + """ Unfolds the event information into hed, base, and contexts either as arrays of str or of HedString. - Notes: - The event context would be placed in an event context group, but is kept in a separate array without the - event context group or tag. + Parameters: + remove_types (list): List of types to remove. + + Returns: + list of str or HedString representing the information without the events of temporal extent + list of str or HedString representing the onsets of the events of temporal extent + list of str or HedString representing the ongoing context information. + + """ + + placeholder = "" + remove_defs = self.get_type_defs(remove_types) + new_hed = [placeholder for _ in range(len(self.hed_strings))] + new_base = [placeholder for _ in range(len(self.hed_strings))] + new_contexts = [placeholder for _ in range(len(self.hed_strings))] + base, contexts = self._expand_context() + for index, item in enumerate(self.hed_strings): + new_hed[index] = self._process_hed(item, remove_types=remove_types, + remove_defs=remove_defs, remove_group=False) + new_base[index] = self._process_hed(base[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) + new_contexts[index] = self._process_hed(contexts[index], remove_types=remove_types, + remove_defs=remove_defs, remove_group=True) + return new_hed, new_base, new_contexts # these are each a list of strings + + def _expand_context(self): + """ Expands the onset and the ongoing context for additional processing. + + """ + base = [[] for _ in range(len(self.hed_strings))] + contexts = [[] for _ in range(len(self.hed_strings))] + for events in self.event_list: + for event in events: + this_str = str(event.contents) + base[event.start_index].append(this_str) + for i in range(event.start_index + 1, event.end_index): + contexts[i].append(this_str) + + return self.compress_strings(base), self.compress_strings(contexts) + + def _process_hed(self, hed, remove_types=[], remove_defs=[], remove_group=False): + if not hed: + return "" + # Reconvert even if hed is already a HedString to make sure a copy and expandable. + hed_obj = HedString(str(hed), hed_schema=self.hed_schema, def_dict=self.def_dict) + hed_obj, temp1 = split_base_tags(hed_obj, remove_types, remove_group=remove_group) + if remove_defs: + hed_obj, temp2 = split_def_tags(hed_obj, remove_defs, remove_group=remove_group) + return str(hed_obj) + + def str_list_to_hed(self, str_list): + """ Create a HedString object from a list of strings. + + Parameters: + str_list (list): A list of strings to be concatenated with commas and then converted. + + Returns: + HedString or None: The converted list. """ - # contexts = [[] for _ in range(len(self.hed_strings))] - # for onset in self.onset_list: - # for i in range(onset.start_index+1, onset.end_index): - # contexts[i].append(onset.contents) - # for i in range(len(self.hed_strings)): - # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - # self.contexts = contexts - print("_set_event_contexts not implemented yet") - - def _update_onset_list(self, group, onset_dict, event_index): - """ Process one onset or offset group to create onset_list. + filtered_list = [item for item in str_list if item != ''] # list of strings + if not filtered_list: # empty lists don't contribute + return None + return HedString(",".join(filtered_list), self.hed_schema, def_dict=self.def_dict) + + @staticmethod + def compress_strings(list_to_compress): + result_list = ["" for _ in range(len(list_to_compress))] + for index, item in enumerate(list_to_compress): + if item: + result_list[index] = ",".join(item) + return result_list + + def get_type_defs(self, types): + """ Return a list of definition names (lower case) that correspond to one of the specified types. Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. + types (list): List of tags that are treated as types such as 'Condition-variable' - :raises HedFileError: - - if an unmatched offset is encountered. + Returns: + list: List of definition names (lower-case) that correspond to the specified types - Notes: - - Modifies onset_dict and onset_list. """ - # def_tags = group.find_def_tags(recursive=False, include_groups=0) - # name = def_tags[0].extension - # onset_element = onset_dict.pop(name, None) - # if onset_element: - # onset_element.end_index = event_index - # self.onset_list.append(onset_element) - # elif is_offset: - # raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - # if not is_offset: - # onset_element = TemporalEvent(name, group, event_index) - # onset_dict[name] = onset_element + def_list = [] + for this_type in types: + type_defs = HedTypeDefs(self.def_dict, type_tag=this_type) + def_list = def_list + list(type_defs.def_map.keys()) + return def_list + + # @staticmethod + # def fix_list(hed_list, hed_schema, as_string=False): + # for index, item in enumerate(hed_list): + # if not item: + # hed_list[index] = None + # elif as_string: + # hed_list[index] = ",".join(str(item)) + # else: + # hed_list[index] = HedString(",".join(str(item)), hed_schema) + # return hed_list diff --git a/hed/tools/analysis/event_manager_copy.py b/hed/tools/analysis/event_manager_copy.py deleted file mode 100644 index 9a8dd02fa..000000000 --- a/hed/tools/analysis/event_manager_copy.py +++ /dev/null @@ -1,147 +0,0 @@ -""" Manages events of temporal extent. """ - -from hed.tools.analysis.temporal_event import TemporalEvent -from hed.models.model_constants import DefTagNames - - -class EventManagerCopy: - - def __init__(self, input_data, hed_schema, extra_def_dict=None): - """ Create an event manager for an events file. Manages events of temporal extent. This - - Parameters: - hed_strings (list): A list of HED strings - onsets (list): A list of onset times that is the same length as hed_strings - def_dict (DefinitionDict): Contains the definitions for this dataset. - - :raises HedFileError: - - if there are any unmatched offsets. - - Notes: Keeps the events of temporal extend by their starting index in events file. These events - are separated from the rest of the annotations. - - """ - - self.event_list = [[] for _ in range(len(onsets))] - self.onsets = onsets - self.hed_strings = hed_strings ## copy.deepcopy(hed_strings) - self.def_dict = def_dict - self.anchor_dict ={} - self._create_event_list() - self._create_anchor_list() - - # def iter_context(self): - # """ Iterate rows of context. - # - # Yields: - # int: position in the dataFrame - # HedString: Context - # - # """ - # - # for index in range(len(self.contexts)): - # yield index, self.contexts[index] - - def _create_anchor_list(self): - """ Populate the dictionary of def names to list of temporal events. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - for index, events in enumerate(self.event_list): - for event in events: - index_list = self.anchor_dict.get(event.anchor, []) - index_list.append(event) - self.anchor_dict[event.anchor] = index_list - - def _create_event_list(self): - """ Populate the event_list with the events with temporal extent indexed by event number. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - Notes: - - """ - onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet. - for event_index, hed in enumerate(self.hed_strings): - self._extract_temporal_events(hed, event_index, onset_dict) - # Now handle the events that extend to end of list - for item in onset_dict.values(): - item.set_end(len(self.onsets), None) - - def _extract_temporal_events(self, hed, event_index, onset_dict): - """ Extract the temporal events and remove them from the other HED strings. - - Parameters: - hed (HedString): The assembled HedString at position event_index in the data. - event_index (int): The position of this string in the data. - onset_dict (dict): Running dict that keeps track of temporal events that haven't yet ended. - - Note: - This removes the events of temporal extent from the HED string. - - """ - if not hed: - return - group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY}, - include_groups=2) - to_remove = [] - for tup in group_tuples: - anchor_tag = tup[1].find_def_tags(recursive=False, include_groups=0)[0] - anchor = anchor_tag.extension.lower() - if anchor in onset_dict or tup[0].short_base_tag.lower() == DefTagNames.OFFSET_KEY: - temporal_event = onset_dict.pop(anchor) - temporal_event.set_end(event_index, self.onsets[event_index]) - if tup[0] == DefTagNames.ONSET_KEY: - new_event = TemporalEvent(tup[1], event_index, self.onsets[event_index]) - self.event_list[event_index].append(new_event) - onset_dict[anchor] = new_event - to_remove.append(tup[1]) - hed.remove(to_remove) - - def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in an event context group, but is kept in a separate array without the - event context group or tag. - - """ - # contexts = [[] for _ in range(len(self.hed_strings))] - # for onset in self.onset_list: - # for i in range(onset.start_index+1, onset.end_index): - # contexts[i].append(onset.contents) - # for i in range(len(self.hed_strings)): - # contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - # self.contexts = contexts - print("_set_event_contexts not implemented yet") - - def _update_onset_list(self, group, onset_dict, event_index): - """ Process one onset or offset group to create onset_list. - - Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. - - :raises HedFileError: - - if an unmatched offset is encountered. - - Notes: - - Modifies onset_dict and onset_list. - """ - # def_tags = group.find_def_tags(recursive=False, include_groups=0) - # name = def_tags[0].extension - # onset_element = onset_dict.pop(name, None) - # if onset_element: - # onset_element.end_index = event_index - # self.onset_list.append(onset_element) - # elif is_offset: - # raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - # if not is_offset: - # onset_element = TemporalEvent(name, group, event_index) - # onset_dict[name] = onset_element diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py deleted file mode 100644 index 79ebb428b..000000000 --- a/hed/tools/analysis/hed_context_manager.py +++ /dev/null @@ -1,141 +0,0 @@ -""" Manages context and events of temporal extent. """ - -from hed.errors.exceptions import HedFileError -from hed.models import HedGroup, HedString -from hed.schema import HedSchema, HedSchemaGroup -from hed.tools.analysis.analysis_util import hed_to_str - -# TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager. -# TODO: Implement insets - -class OnsetGroup: - def __init__(self, name, contents, start_index, end_index=None): - self.name = name - self.start_index = start_index - self.end_index = end_index - self.contents = hed_to_str(contents, remove_parentheses=True) - - def __str__(self): - return f"{self.name}:[events {self.start_index}:{self.end_index} contents:{self.contents}]" - - -class HedContextManager: - - def __init__(self, hed_strings, hed_schema): - """ Create a context manager for an events file. - - Parameters: - hed_strings (list): A list of HedString objects to be managed. - hed_schema (HedSchema): A HedSchema - - :raises HedFileError: - - If there are any unmatched offsets. - - Notes: - The constructor has the side effect of splitting each element of the hed_strings list into two - by removing the Offset groups and the Onset tags. The context has the temporal extent information. - For users wanting to use only Onset events, self.hed_strings contains the information. - - """ - - self.hed_strings = hed_strings - if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup): - raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup") - self.hed_schema = hed_schema - self.onset_list = [] - self.onset_count = 0 - self.offset_count = 0 - self._create_onset_list() - self._set_event_contexts() - - # def _extract_hed_objs(self, assembled): - # hed_objs = [None for _ in range(len(assembled))] - # for index, value in assembled["HED_assembled"].items(): - # hed_objs[index] = HedString(value, hed_schema=self.hed_schema) - # return hed_objs - - def iter_context(self): - """ Iterate rows of context. - - Yields: - HedString: The HedString. - HedString: Context - - """ - - for index in range(len(self.hed_strings)): - yield self.hed_strings[index], self.contexts[index] - - def _create_onset_list(self): - """ Create a list of events of extended duration. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - """ - - self.onset_list = [] - onset_dict = {} - for event_index, hed in enumerate(self.hed_strings): - to_remove = [] # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1) - onset_tuples = hed.find_top_level_tags(["onset"], include_groups=2) - self.onset_count += len(onset_tuples) - for tup in onset_tuples: - group = tup[1] - group.remove([tup[0]]) - self._update_onset_list(group, onset_dict, event_index, is_offset=False) - offset_tuples = hed.find_top_level_tags(["offset"], include_groups=2) - self.offset_count += len(offset_tuples) - for tup in offset_tuples: - group = tup[1] - to_remove.append(group) - self._update_onset_list(group, onset_dict, event_index, is_offset=True) - hed.remove(to_remove) - - # Now handle the events that extend to end of list - for key, value in onset_dict.items(): - value.end_index = len(self.hed_strings) - self.onset_list.append(value) - - def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in a event context group, but is kept in a separate array without the - event context group or tag. - - """ - contexts = [[] for _ in range(len(self.hed_strings))] - for onset in self.onset_list: - for i in range(onset.start_index+1, onset.end_index): - contexts[i].append(onset.contents) - for i in range(len(self.hed_strings)): - contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - self.contexts = contexts - - def _update_onset_list(self, group, onset_dict, event_index, is_offset=False): - """ Process one onset or offset group to create onset_list. - - Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. - is_offset (bool): True if processing an offset. - - :raises HedFileError: - - If an unmatched offset is encountered. - - Notes: - - Modifies onset_dict and onset_list. - """ - def_tags = group.find_def_tags(recursive=False, include_groups=0) - name = def_tags[0].extension - onset_element = onset_dict.pop(name, None) - if onset_element: - onset_element.end_index = event_index - self.onset_list.append(onset_element) - elif is_offset: - raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - if not is_offset: - onset_element = OnsetGroup(name, group, event_index) - onset_dict[name] = onset_element diff --git a/hed/tools/analysis/hed_context_manager_new.py b/hed/tools/analysis/hed_context_manager_new.py deleted file mode 100644 index f7dbdaeb0..000000000 --- a/hed/tools/analysis/hed_context_manager_new.py +++ /dev/null @@ -1,133 +0,0 @@ -""" Manages context and events of temporal extent. """ - -from hed.errors.exceptions import HedFileError -from hed.models import HedGroup, HedString -from hed.schema import HedSchema, HedSchemaGroup -from hed.tools.analysis.analysis_util import hed_to_str -from hed.tools.analysis.temporal_event import TemporalEvent - -# TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager. -# TODO: Implement insets - - -class HedContextManagerNew: - - def __init__(self, data, hed_schema): - """ Create a context manager for an events file. - - Parameters: - data (TabularInput): A TabularInput representing a data frame. - hed_schema (HedSchema): A HedSchema - - :raises HedFileError: - - If there are any unmatched offsets. - - Notes: - The constructor has the side effect of splitting each element of the hed_strings list into two - by removing the Offset groups and the Onset tags. The context has the temporal extent information. - For users wanting to use only Onset events, self.hed_strings contains the information. - - """ - self.data = data - self.hed_schema = hed_schema - if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup): - raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup") - self.event_manager = EventManager - self.onset_list = [] - self.onset_count = 0 - self.offset_count = 0 - self.contexts = [] - self._create_onset_list() - self._set_event_contexts() - - # def _extract_hed_objs(self, assembled): - # hed_objs = [None for _ in range(len(assembled))] - # for index, value in assembled["HED_assembled"].items(): - # hed_objs[index] = HedString(value, hed_schema=self.hed_schema) - # return hed_objs - - def iter_context(self): - """ Iterate rows of context. - - Yields: - HedString: The HedString. - HedString: Context - - """ - - for index in range(len(self.hed_strings)): - yield self.hed_strings[index], self.contexts[index] - - def _create_onset_list(self): - """ Create a list of events of extended duration. - - :raises HedFileError: - - If the hed_strings contain unmatched offsets. - - """ - - self.onset_list = [] - onset_dict = {} - for event_index, hed in enumerate(self.hed_strings): - to_remove = [] # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1) - onset_tuples = hed.find_top_level_tags(["onset"], include_groups=2) - self.onset_count += len(onset_tuples) - for tup in onset_tuples: - group = tup[1] - group.remove([tup[0]]) - self._update_onset_list(group, onset_dict, event_index, is_offset=False) - offset_tuples = hed.find_top_level_tags(["offset"], include_groups=2) - self.offset_count += len(offset_tuples) - for tup in offset_tuples: - group = tup[1] - to_remove.append(group) - self._update_onset_list(group, onset_dict, event_index, is_offset=True) - hed.remove(to_remove) - - # Now handle the events that extend to end of list - for key, value in onset_dict.items(): - value.end_index = len(self.hed_strings) - self.onset_list.append(value) - - def _set_event_contexts(self): - """ Creates an event context for each hed string. - - Notes: - The event context would be placed in a event context group, but is kept in a separate array without the - event context group or tag. - - """ - contexts = [[] for _ in range(len(self.hed_strings))] - for onset in self.onset_list: - for i in range(onset.start_index+1, onset.end_index): - contexts[i].append(onset.contents) - for i in range(len(self.hed_strings)): - contexts[i] = HedString(",".join(contexts[i]), hed_schema=self.hed_schema) - self.contexts = contexts - - def _update_onset_list(self, group, onset_dict, event_index, is_offset=False): - """ Process one onset or offset group to create onset_list. - - Parameters: - group (HedGroup): The HedGroup containing the onset or offset. - onset_dict (dict): A dictionary of OnsetGroup objects that keep track of span of an event. - event_index (int): The event number in the list. - is_offset (bool): True if processing an offset. - - :raises HedFileError: - - If an unmatched offset is encountered. - - Notes: - - Modifies onset_dict and onset_list. - """ - def_tags = group.find_def_tags(recursive=False, include_groups=0) - name = def_tags[0].extension - onset_element = onset_dict.pop(name, None) - if onset_element: - onset_element.end_index = event_index - self.onset_list.append(onset_element) - elif is_offset: - raise HedFileError("UnmatchedOffset", f"Unmatched {name} offset at event {event_index}", " ") - if not is_offset: - onset_element = TemporalEvent(name, group, event_index) - onset_dict[name] = onset_element diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 300319820..1dd86d899 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -76,13 +76,12 @@ def __init__(self, name, total_events=0): self.files = {} self.total_events = total_events - def update_event_counts(self, hed_string_obj, file_name, definitions=None): + def update_event_counts(self, hed_string_obj, file_name): """ Update the tag counts based on a hed string object. Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. file_name (str): The name of the file corresponding to these counts. - definitions (dict): The definitions associated with the HED string. """ if file_name not in self.files: @@ -151,7 +150,8 @@ def _update_template(tag_count, template, unmatched): Parameters: tag_count (HedTagCount): Information for a particular tag. - template (dict): The + template (dict): The dictionary to match. + unmatched (list): List of tag counts not matched so far. """ tag_list = reversed(list(tag_count.tag_terms)) diff --git a/hed/tools/analysis/hed_tag_manager.py b/hed/tools/analysis/hed_tag_manager.py new file mode 100644 index 000000000..e5bdb78af --- /dev/null +++ b/hed/tools/analysis/hed_tag_manager.py @@ -0,0 +1,57 @@ +""" Manager for the HED tags in a tabular file. """ + +from hed.models import HedString +from hed.models.string_util import split_base_tags + + +class HedTagManager: + + def __init__(self, event_manager, remove_types=[]): + """ Create a tag manager for one tabular file. + + Parameters: + event_manager (EventManager): an event manager for the tabular file. + remove_types (list or None): List of type tags (such as condition-variable) to remove. + + """ + + self.event_manager = event_manager + self.remove_types = remove_types + self.hed_strings, self.base_strings, self.context_strings = ( + self.event_manager.unfold_context(remove_types=remove_types)) + self.type_def_names = self.event_manager.get_type_defs(remove_types) + + # def get_hed_objs1(self, include_context=True): + # hed_objs = [None for _ in range(len(self.event_manager.onsets))] + # for index in range(len(hed_objs)): + # hed_list = [self.hed_strings[index], self.base_strings[index]] + # if include_context and self.context_strings[index]: + # hed_list.append('(Event-context, (' + self.context_strings[index] + "))") + # hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) + # return hed_objs + + def get_hed_objs(self, include_context=True, replace_defs=False): + hed_objs = [None for _ in range(len(self.event_manager.onsets))] + for index in range(len(hed_objs)): + hed_list = [self.hed_strings[index], self.base_strings[index]] + if include_context and self.context_strings[index]: + hed_list.append("(Event-context, (" + self.context_strings[index] + "))") + hed_objs[index] = self.event_manager.str_list_to_hed(hed_list) + if replace_defs and hed_objs[index]: + for def_tag in hed_objs[index].find_def_tags(recursive=True, include_groups=0): + hed_objs[index].replace(def_tag, def_tag.expandable.get_first_group()) + return hed_objs + + def get_hed_obj(self, hed_str, remove_types=False, remove_group=False): + if not hed_str: + return None + hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) + if remove_types: + hed_obj, temp = split_base_tags(hed_obj, self.remove_types, remove_group=remove_group) + return hed_obj + + # def get_hed_string_obj(self, hed_str, filter_types=False): + # hed_obj = HedString(hed_str, self.event_manager.hed_schema, def_dict=self.event_manager.def_dict) + # # if filter_types: + # # hed_obj = hed_obj + # return hed_obj diff --git a/hed/tools/analysis/hed_type.py b/hed/tools/analysis/hed_type.py new file mode 100644 index 000000000..6a3bdc270 --- /dev/null +++ b/hed/tools/analysis/hed_type.py @@ -0,0 +1,199 @@ +""" Manages a type variable and its associated context. """ +import pandas as pd +from hed.models import HedGroup, HedTag +from hed.tools.analysis.hed_type_defs import HedTypeDefs +from hed.tools.analysis.hed_type_factors import HedTypeFactors + + +class HedType: + + def __init__(self, event_manager, name, type_tag="condition-variable"): + """ Create a variable manager for one type-variable for one tabular file. + + Parameters: + event_manager (EventManager): An event manager for the tabular file. + name (str): Name of the tabular file as a unique identifier. + type_tag (str): Lowercase short form of the tag to be managed. + + :raises HedFileError: + - On errors such as unmatched onsets or missing type_defs. + + """ + self.name = name + self.type_tag = type_tag.lower() + self.event_manager = event_manager + self.type_defs = HedTypeDefs(event_manager.def_dict, type_tag=type_tag) + self._type_map = {} # Dictionary of type tags versus dictionary with keys being definition names. + self._extract_variables() + + @property + def total_events(self): + return len(self.event_manager.event_list) + + def get_type_value_factors(self, type_value): + """ Return the HedTypeFactors associated with type_name or None. + + Parameters: + type_value (str): The tag corresponding to the type's value (such as the name of the condition variable). + + Returns: + HedTypeFactors or None + + """ + return self._type_map.get(type_value.lower(), None) + + def get_type_value_level_info(self, type_value): + """ Return type variable corresponding to type_value. + + Parameters: + type_value (str) - name of the type variable + + Returns: + + + """ + return self._type_map.get(type_value, None) + + @property + def type_variables(self): + return set(self._type_map.keys()) + + def get_type_def_names(self): + """ Return the type_defs """ + tag_list = [] + for variable, factor in self._type_map.items(): + tag_list = tag_list + [key for key in factor.levels.keys()] + return list(set(tag_list)) + + def get_type_value_names(self): + return list(self._type_map.keys()) + + def get_summary(self): + var_summary = self._type_map.copy() + summary = {} + for var_name, var_sum in var_summary.items(): + summary[var_name] = var_sum.get_summary() + return summary + + def get_type_factors(self, type_values=None, factor_encoding="one-hot"): + """ Create a dataframe with the indicated type tag values as factors. + + Parameters: + type_values (list or None): A list of values of type tags for which to generate factors. + factor_encoding (str): Type of factor encoding (one-hot or categorical). + + Returns: + DataFrame: Contains the specified factors associated with this type tag. + + + """ + if type_values is None: + type_values = self.get_type_value_names() + df_list = [] + for index, type_value in enumerate(type_values): + var_sum = self._type_map.get(type_value, None) + if not var_sum: + continue + df_list.append(var_sum.get_factors(factor_encoding=factor_encoding)) + if not df_list: + return None + else: + return pd.concat(df_list, axis=1) + + def __str__(self): + return f"{self.type_tag} type_variables: {str(list(self._type_map.keys()))}" + + def _extract_definition_variables(self, item, index): + """ Extract the definition uses from a HedTag, HedGroup, or HedString. + + Parameters: + item (HedTag, HedGroup, or HedString): The item to extract variable information from. + index (int): Position of this item in the object's hed_strings. + + Notes: + This updates the HedTypeFactors information. + + """ + + if isinstance(item, HedTag): + tags = [item] + else: + tags = item.get_all_tags() + for tag in tags: + if tag.short_base_tag.lower() != "def": + continue + hed_vars = self.type_defs.get_type_values(tag) + if not hed_vars: + continue + self._update_definition_variables(tag, hed_vars, index) + + def _update_definition_variables(self, tag, hed_vars, index): + """Update the HedTypeFactors map with information from Def tag. + + Parameters: + tag (HedTag): A HedTag that is a Def tag. + hed_vars (list): A list of names of the hed type_variables + index (ind): The event number associated with this. + + Notes: + This modifies the HedTypeFactors map. + + """ + level = tag.extension.lower() + for var_name in hed_vars: + hed_var = self._type_map.get(var_name, None) + if hed_var is None: + hed_var = HedTypeFactors(self.type_tag, var_name, self.total_events) + self._type_map[var_name] = hed_var + var_levels = hed_var.levels.get(level, {index: 0}) + var_levels[index] = 0 + hed_var.levels[level] = var_levels + + def _extract_variables(self): + """ Extract all type_variables from hed_strings and event_contexts. """ + + hed, base, context = self.event_manager.unfold_context() + for index in range(len(hed)): + this_hed = self.event_manager.str_list_to_hed([hed[index], base[index], context[index]]) + if this_hed: + tag_list = self.get_type_list(self.type_tag, this_hed) + self._update_variables(tag_list, index) + self._extract_definition_variables(this_hed, index) + + @staticmethod + def get_type_list(type_tag, item): + """ Find a list of the given type tag from a HedTag, HedGroup, or HedString. + + Parameters: + type_tag (str): a tag whose direct items you wish to remove + item (HedTag or HedGroup): The item from which to extract condition type_variables. + + Returns: + list: List of the items with this type_tag + + """ + if isinstance(item, HedTag) and item.short_base_tag.lower() == type_tag: + tag_list = [item] + elif isinstance(item, HedGroup) and item.children: + tag_list = item.find_tags_with_term(type_tag, recursive=True, include_groups=0) + else: + tag_list = [] + return tag_list + + def _update_variables(self, tag_list, index): + """ Update the HedTypeFactors based on tags in the list. + + Parameters: + tag_list (list): A list of Condition-variable HedTags. + index (int): An integer representing the position in an array + + """ + for tag in tag_list: + tag_value = tag.extension.lower() + if not tag_value: + tag_value = self.type_tag + hed_var = self._type_map.get(tag_value, None) + if hed_var is None: + hed_var = HedTypeFactors(self.type_tag, tag_value, self.total_events) + self._type_map[tag_value] = hed_var + hed_var.direct_indices[index] = '' diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py index e68f2064e..056bd63d7 100644 --- a/hed/tools/analysis/hed_type_counts.py +++ b/hed/tools/analysis/hed_type_counts.py @@ -147,4 +147,4 @@ def get_summary(self): for type_value, count in self.type_dict.items(): details[type_value] = count.get_summary() return {'name': str(self.name), 'type_tag': self.type_tag, 'files': list(self.files.keys()), - 'total_events': self.total_events, 'details': details} \ No newline at end of file + 'total_events': self.total_events, 'details': details} diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_defs.py similarity index 65% rename from hed/tools/analysis/hed_type_definitions.py rename to hed/tools/analysis/hed_type_defs.py index 417083e44..b6a0b8ab1 100644 --- a/hed/tools/analysis/hed_type_definitions.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -1,40 +1,39 @@ -""" Manages definitions associated with a type such as condition-variable. """ +""" Manages type_defs associated with a type such as condition-variable. """ from hed.models.hed_tag import HedTag from hed.models.definition_dict import DefinitionDict -class HedTypeDefinitions: +class HedTypeDefs: """ Properties: def_map (dict): keys are definition names, values are dict {type_values, description, tags} - Example: A definition 'famous-face-cond' with contents - `(Condition-variable/Face-type,Description/A face that should be recognized by the participants,(Image,(Face,Famous)))` + Example: A definition 'famous-face-cond' with contents + `(Condition-variable/Face-type,Description/A face that should be recognized by the + participants,(Image,(Face,Famous)))` would have type_values ['face_type']. All items are strings not objects. """ - def __init__(self, definitions, hed_schema, type_tag='condition-variable'): + def __init__(self, definitions, type_tag='condition-variable'): """ Create a definition manager for a type of variable. Parameters: definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects. - hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing. type_tag (str): Lower-case HED tag string representing the type managed. """ self.type_tag = type_tag.lower() - self.hed_schema = hed_schema if isinstance(definitions, DefinitionDict): self.definitions = definitions.defs elif isinstance(definitions, dict): self.definitions = definitions else: self.definitions = {} - self.def_map = self._extract_def_map() - self.type_map = self._extract_type_map() # + self.def_map = self._extract_def_map() # dict def names vs {description, tags, type_values} + self.type_map = self._extract_type_map() # Dictionary of type_values vs dict definition names def get_type_values(self, item): """ Return a list of type_tag values in item. @@ -46,25 +45,46 @@ def get_type_values(self, item): list: A list of the unique values associated with this type """ - def_names = self.get_def_names(item, no_value=True) - type_tag_values = [] + def_names = self.extract_def_names(item, no_value=True) + type_values = [] for def_name in def_names: - values = self.def_map.get(def_name.lower(), None) - if values and values["type_values"]: - type_tag_values = type_tag_values + values["type_values"] - return type_tag_values + values = self.def_map.get(def_name.lower(), {}) + if "type_values" in values: + type_values = type_values + values["type_values"] + return type_values + + @property + def type_def_names(self): + """ List of names of definition that have this type-variable. + + Returns: + list: definition names that have this type. + + """ + return list(self.def_map.keys()) + + @property + def type_names(self): + """ List of names of the type-variables associated with type definitions. + + Returns: + list: type names associated with the type definitions + + """ + return list(self.type_map.keys()) def _extract_def_map(self): """ Extract type_variables associated with each definition and add them to def_map. """ def_map = {} for entry in self.definitions.values(): - type_values, description, other_tags = self._extract_entry_values(entry) - def_map[entry.name.lower()] = \ - {'type_values': type_values, 'description': description, 'tags': other_tags} + type_def, type_values, description, other_tags = self._extract_entry_values(entry) + if type_def: + def_map[type_def.lower()] = \ + {'def_name': type_def, 'type_values': type_values, 'description': description, 'tags': other_tags} return def_map def _extract_type_map(self): - """ Extract the definitions associated with each type value and add them to the dictionary. """ + """ Extract the type_defs associated with each type value and add them to the dictionary. """ type_map = {} for def_name, def_values in self.def_map.items(): @@ -88,7 +108,8 @@ def _extract_entry_values(self, entry): """ tag_list = entry.contents.get_all_tags() - type_tag_values = [] + type_values = [] + type_def = "" description = '' other_tags = [] for hed_tag in tag_list: @@ -97,15 +118,12 @@ def _extract_entry_values(self, entry): elif hed_tag.short_base_tag.lower() != self.type_tag: other_tags.append(hed_tag.short_base_tag) else: - value = hed_tag.extension.lower() - if value: - type_tag_values.append(value) - else: - type_tag_values.append(entry.name) - return type_tag_values, description, other_tags + type_values.append(hed_tag.extension.lower()) + type_def = entry.name + return type_def, type_values, description, other_tags @staticmethod - def get_def_names(item, no_value=True): + def extract_def_names(item, no_value=True): """ Return a list of Def values in item. Parameters: @@ -122,7 +140,7 @@ def get_def_names(item, no_value=True): names = [tag.extension.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms] if no_value: for index, name in enumerate(names): - name, name_value = HedTypeDefinitions.split_name(name) + name, name_value = HedTypeDefs.split_name(name) names[index] = name return names diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index b4cc92af4..5af03c9b3 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -13,9 +13,9 @@ def __init__(self, type_tag, type_value, number_elements): """ Constructor for HedTypeFactors. Parameters: + type_tag (str): Lowercase string corresponding to a HED tag which has a takes value child. type_value (str): The value of the type summarized by this class. number_elements (int): Number of elements in the data column - type_tag (str): Lowercase string corresponding to a HED tag which has a takes value child. """ diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 87ed57869..9960d31ec 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -1,39 +1,35 @@ -""" Manager for type factors and type definitions. """ +""" Manager for type factors and type type_defs. """ import pandas as pd import json -from hed.tools.analysis.hed_type_values import HedTypeValues -from hed.tools.analysis.hed_context_manager import HedContextManager +from hed.tools.analysis.hed_type import HedType class HedTypeManager: - def __init__(self, hed_strings, hed_schema, definitions): + def __init__(self, event_manager): """ Create a variable manager for one tabular file for all type variables. Parameters: - hed_strings (list): A list of HED strings. - hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing. - definitions (dict): A dictionary of DefinitionEntry objects. + event_manager (EventManager): an event manager for the tabular file. :raises HedFileError: - - On errors such as unmatched onsets or missing definitions. + - On errors such as unmatched onsets or missing type_defs. """ - self.definitions = definitions - self.context_manager = HedContextManager(hed_strings, hed_schema) - self._type_tag_map = {} # a map of type tag into HedTypeValues objects + self.event_manager = event_manager + self._type_map = {} # a map of type tag into HedType objects @property - def type_variables(self): - return list(self._type_tag_map.keys()) + def types(self): + return list(self._type_map.keys()) - def add_type_variable(self, type_name): - if type_name.lower() in self._type_tag_map: + def add_type(self, type_name): + if type_name.lower() in self._type_map: return - self._type_tag_map[type_name.lower()] = \ - HedTypeValues(self.context_manager, self.definitions, 'run-01', type_tag=type_name) + self._type_map[type_name.lower()] = \ + HedType(self.event_manager, 'run-01', type_tag=type_name) def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-hot"): """ Return a DataFrame of factor vectors for the indicated HED tag and values @@ -47,7 +43,7 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho DataFrame or None: DataFrame containing the factor vectors as the columns. """ - this_var = self.get_type_variable(type_tag.lower()) + this_var = self.get_type(type_tag.lower()) if this_var is None: return None variables = this_var.get_type_value_names() @@ -55,23 +51,23 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho type_values = variables df_list = [0]*len(type_values) for index, variable in enumerate(type_values): - var_sum = this_var._type_value_map[variable] + var_sum = this_var._type_map[variable] df_list[index] = var_sum.get_factors(factor_encoding=factor_encoding) if not df_list: return None return pd.concat(df_list, axis=1) - def get_type_variable(self, type_tag): + def get_type(self, type_tag): """ Parameters: type_tag (str): HED tag to retrieve the type for Returns: - HedTypeValues or None: the values associated with this type tag + HedType or None: the values associated with this type tag """ - return self._type_tag_map.get(type_tag.lower(), None) + return self._type_map.get(type_tag.lower(), None) def get_type_tag_factor(self, type_tag, type_value): """ Return the HedTypeFactors a specified value and extension. @@ -81,20 +77,20 @@ def get_type_tag_factor(self, type_tag, type_value): type_value (str or None): Value of this tag to return the factors for. """ - this_map = self._type_tag_map.get(type_tag.lower(), None) + this_map = self._type_map.get(type_tag.lower(), None) if this_map: - return this_map._type_value_map.get(type_value.lower(), None) + return this_map._type_map.get(type_value.lower(), None) return None - def get_type_tag_def_names(self, type_var): - this_map = self._type_tag_map.get(type_var, None) + def get_type_def_names(self, type_var): + this_map = self._type_map.get(type_var, None) if not this_map: return [] return this_map.get_type_def_names() def summarize_all(self, as_json=False): summary = {} - for type_tag, type_tag_var in self._type_tag_map.items(): + for type_tag, type_tag_var in self._type_map.items(): summary[type_tag] = type_tag_var.get_summary() if as_json: return json.dumps(summary, indent=4) @@ -102,4 +98,4 @@ def summarize_all(self, as_json=False): return summary def __str__(self): - return f"Type_variables: {str(list(self._type_tag_map.keys()))}" + return f"Type_variables: {str(list(self._type_map.keys()))}" diff --git a/hed/tools/analysis/hed_type_values.py b/hed/tools/analysis/hed_type_values.py deleted file mode 100644 index 3190d0bf4..000000000 --- a/hed/tools/analysis/hed_type_values.py +++ /dev/null @@ -1,262 +0,0 @@ -""" Manages a type variable and its associated context. """ - -import pandas as pd -from hed.models.hed_tag import HedTag -from hed.models.hed_group import HedGroup -from hed.tools.analysis.hed_type_definitions import HedTypeDefinitions -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_factors import HedTypeFactors - - -class HedTypeValues: - - def __init__(self, context_manager, definitions, name, type_tag="condition-variable"): - """ Create a variable manager for one type-variable for one tabular file. - - Parameters: - context_manager (HedContextManager): A list of HED strings. - definitions (dict): A dictionary of DefinitionEntry objects. - name (str): Name of the tabular file as a unique identifier. - type_tag (str): Lowercase short form of the tag to be managed. - - :raises HedFileError: - - On errors such as unmatched onsets or missing definitions. - - """ - self.name = name - self.type_tag = type_tag.lower() - self.definitions = HedTypeDefinitions(definitions, context_manager.hed_schema, type_tag=type_tag) - hed_strings = context_manager.hed_strings - hed_contexts = context_manager.contexts - self.total_events = len(hed_strings) - self._type_value_map = {} - self._extract_variables(hed_strings, hed_contexts) - - def get_type_value_factors(self, type_value): - """ Return the HedTypeFactors associated with type_name or None. - - Parameters: - type_value (str): The tag corresponding to the type's value (such as the name of the condition variable). - - Returns: - HedTypeFactors or None - - """ - return self._type_value_map.get(type_value.lower(), None) - - def get_type_value_level_info(self, type_value): - """ Return type variable corresponding to type_value. - - Parameters: - type_value (str) - name of the type variable - - Returns: - - - """ - return self._type_value_map.get(type_value, None) - - @property - def type_variables(self): - return set(self._type_value_map.keys()) - - def get_type_def_names(self): - """ Return the definitions """ - tag_list = [] - for variable, factor in self._type_value_map.items(): - tag_list = tag_list + [key for key in factor.levels.keys()] - return list(set(tag_list)) - - def get_type_value_names(self): - return list(self._type_value_map.keys()) - - def get_summary(self): - var_summary = self._type_value_map.copy() - summary = {} - for var_name, var_sum in var_summary.items(): - summary[var_name] = var_sum.get_summary() - return summary - - def get_type_factors(self, type_values=None, factor_encoding="one-hot"): - """ Create a dataframe with the indicated type tag values as factors. - - Parameters: - type_values (list or None): A list of values of type tags for which to generate factors. - factor_encoding (str): Type of factor encoding (one-hot or categorical). - - Returns: - DataFrame: Contains the specified factors associated with this type tag. - - - """ - if type_values is None: - type_values = self.get_type_value_names() - df_list = [] - for index, type_value in enumerate(type_values): - var_sum = self._type_value_map.get(type_value, None) - if not var_sum: - continue - df_list.append(var_sum.get_factors(factor_encoding=factor_encoding)) - if not df_list: - return None - else: - return pd.concat(df_list, axis=1) - - def __str__(self): - return f"{self.type_tag} type_variables: {str(list(self._type_value_map.keys()))}" - - def _extract_definition_variables(self, item, index): - """ Extract the definition uses from a HedTag, HedGroup, or HedString. - - Parameters: - item (HedTag, HedGroup, or HedString): The item to extract variable information from. - index (int): Position of this item in the object's hed_strings. - - Notes: - This updates the HedTypeFactors information. - - """ - - if isinstance(item, HedTag): - tags = [item] - else: - tags = item.get_all_tags() - for tag in tags: - if tag.short_base_tag.lower() != "def": - continue - hed_vars = self.definitions.get_type_values(tag) - if not hed_vars: - continue - self._update_definition_variables(tag, hed_vars, index) - - def _update_definition_variables(self, tag, hed_vars, index): - """Update the HedTypeFactors map with information from Def tag. - - Parameters: - tag (HedTag): A HedTag that is a Def tag. - hed_vars (list): A list of names of the hed type_variables - index (ind): The event number associated with this. - - Notes: - This modifies the HedTypeFactors map. - - """ - level = tag.extension.lower() - for var_name in hed_vars: - hed_var = self._type_value_map.get(var_name, None) - if hed_var is None: - hed_var = HedTypeFactors(self.type_tag, var_name, self.total_events) - self._type_value_map[var_name] = hed_var - var_levels = hed_var.levels.get(level, {index: 0}) - var_levels[index] = 0 - hed_var.levels[level] = var_levels - - def _extract_variables(self, hed_strings, hed_contexts): - """ Extract all type_variables from hed_strings and event_contexts. """ - for index, hed in enumerate(hed_strings): - self._extract_direct_variables(hed, index) - self._extract_definition_variables(hed, index) - - self._extract_direct_variables(hed_contexts[index], index) - self._extract_definition_variables(hed_contexts[index], index) - - def _extract_direct_variables(self, item, index): - """ Extract the condition type_variables from a HedTag, HedGroup, or HedString. - - Parameters: - item (HedTag or HedGroup): The item from which to extract condition type_variables. - index (int): Position in the array. - - """ - if isinstance(item, HedTag) and item.short_base_tag.lower() == self.type_tag: - tag_list = [item] - elif isinstance(item, HedGroup) and item.children: - tag_list = item.find_tags_with_term(self.type_tag, recursive=True, include_groups=0) - else: - tag_list = [] - self._update_variables(tag_list, index) - - def _update_variables(self, tag_list, index): - """ Update the HedTypeFactors based on tags in the list. - - Parameters: - tag_list (list): A list of Condition-variable HedTags. - index (int): An integer representing the position in an array - - """ - for tag in tag_list: - tag_value = tag.extension.lower() - if not tag_value: - tag_value = self.type_tag - hed_var = self._type_value_map.get(tag_value, None) - if hed_var is None: - hed_var = HedTypeFactors(self.type_tag, tag_value, self.total_events) - self._type_value_map[tag_value] = hed_var - hed_var.direct_indices[index] = '' - - -# if __name__ == '__main__': -# import os -# from hed import Sidecar, TabularInput, HedString -# from hed.models import DefinitionEntry -# from hed.tools.analysis.analysis_util import get_assembled_strings -# hed_schema = load_schema_version(xml_version="8.1.0") -# test_strings1 = [HedString(f"Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," -# f"(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", hed_schema=hed_schema), -# HedString('(Def/Cond1, Offset)', hed_schema=hed_schema), -# HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', -# hed_schema=hed_schema), -# HedString('', hed_schema=hed_schema), -# HedString('(Def/Cond2, Onset)', hed_schema=hed_schema), -# HedString('(Def/Cond3/4.3, Onset)', hed_schema=hed_schema), -# HedString('Arm, Leg, Condition-variable/Fast, Def/Cond6/7.2', hed_schema=hed_schema)] -# -# test_strings2 = [HedString(f"Def/Cond2, Def/Cond6/4, Def/Cond6/7.8, Def/Cond6/Alpha", hed_schema=hed_schema), -# HedString("Yellow", hed_schema=hed_schema), -# HedString("Def/Cond2", hed_schema=hed_schema), -# HedString("Def/Cond2, Def/Cond6/5.2", hed_schema=hed_schema)] -# test_strings3 = [HedString(f"Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", -# hed_schema=hed_schema), -# HedString("Yellow", hed_schema=hed_schema), -# HedString("Def/Cond2, (Def/Cond6/4, Onset)", hed_schema=hed_schema), -# HedString("Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", hed_schema=hed_schema), -# HedString("Def/Cond2, Def/Cond6/4", hed_schema=hed_schema)] -# def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=hed_schema) -# def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=hed_schema) -# def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', -# hed_schema=hed_schema) -# def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=hed_schema) -# def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=hed_schema) -# def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=hed_schema) -# defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), -# 'Cond2': DefinitionEntry('Cond2', def2, False, None), -# 'Cond3': DefinitionEntry('Cond3', def3, True, None), -# 'Cond4': DefinitionEntry('Cond4', def4, False, None), -# 'Cond5': DefinitionEntry('Cond5', def5, False, None), -# 'Cond6': DefinitionEntry('Cond6', def6, True, None) -# } -# -# conditions1 = HedTypeValues(HedContextManager(test_strings1), hed_schema, defs) -# conditions2 = HedTypeValues(HedContextManager(test_strings2), hed_schema, defs) -# conditions3 = HedTypeValues(HedContextManager(test_strings3), hed_schema, defs) -# bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), -# '../../../tests/data/bids_tests/eeg_ds003645s_hed')) -# events_path = os.path.realpath(os.path.join(bids_root_path, -# 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) -# sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) -# sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') -# input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") -# hed_strings = get_assembled_strings(input_data, hed_schema=hed_schema, expand_defs=False) -# onset_man = HedContextManager(hed_strings) -# definitions = input_data.get_definitions().gathered_defs -# var_type = HedTypeValues(onset_man, hed_schema, definitions) -# df = var_type.get_type_factors() -# summary = var_type.get_summary() -# df.to_csv("D:/wh_conditionslong.csv", sep='\t', index=False) -# with open('d:/wh_summary.json', 'w') as f: -# json.dump(summary, f, indent=4) -# -# df_no_hot = var_type.get_type_factors(factor_encoding="categorical") -# df_no_hot.to_csv("D:/wh_conditions_no_hot.csv", sep='\t', index=False) -# with open('d:/wh_summarylong.json', 'w') as f: -# json.dump(summary, f, indent=4) diff --git a/hed/tools/analysis/tabular_summary.py b/hed/tools/analysis/tabular_summary.py index 1262f368b..860487db1 100644 --- a/hed/tools/analysis/tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -1,4 +1,4 @@ -""" Summarizes the contents of tabular files. """ +""" Summarize the contents of tabular files. """ import json @@ -134,7 +134,7 @@ def update_summary(self, tab_sum): Notes: - The value_cols and skip_cols are updated as long as they are not contradictory. - - A new skip column cannot used. + - A new skip column cannot be used. """ self.total_files = self.total_files + tab_sum.total_files @@ -222,7 +222,7 @@ def extract_summary(summary_info): Parameters: summary_info (dict or str): A JSON string or a dictionary containing contents of a TabularSummary. - + Returns: TabularSummary: contains the information in summary_info as a TabularSummary object. """ diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index 876fee6ba..7a689609d 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -1,13 +1,15 @@ -from hed.models import HedTag, HedGroup +from hed.models import HedGroup class TemporalEvent: - """ Represents an event process. - - Note: the contents must have a De + """ Represents an event process with starting and ending. + + Note: the contents have the Onset and duration removed. """ def __init__(self, contents, start_index, start_time): - self.contents = contents # Must not have definition expanded if there is a definition. + if not contents: + raise(ValueError, "A temporal event must have contents") + self.contents = None # Must not have definition expanded if there is a definition. self.start_index = start_index self.start_time = float(start_time) self.end_index = None @@ -15,18 +17,29 @@ def __init__(self, contents, start_index, start_time): self.anchor = None # Lowercase def name with value self.internal_group = None self.insets = [] - self._split_group() + self._split_group(contents) def set_end(self, end_index, end_time): self.end_index = end_index self.end_time = end_time - def _split_group(self): - for item in self.contents.children: + def _split_group(self, contents): + to_remove = [] + for item in contents.children: if isinstance(item, HedGroup): self.internal_group = item + elif item.short_base_tag.lower() == "onset": + to_remove.append(item) + elif item.short_base_tag.lower() == "duration": + to_remove.append(item) + self.end_time = self.short_time + float(item.extension.lower()) # Will need to be fixed for units elif item.short_base_tag.lower() == "def": - self.anchor = item.extension.lower() + self.anchor = item.short_tag + contents.remove(to_remove) + if self.internal_group: + self.contents = contents + else: + self.contents = self.anchor def __str__(self): return f"[{self.start_index}:{self.end_index}] anchor:{self.anchor} contents:{self.contents}" diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py index fc4aa89f4..d6cd4592c 100644 --- a/hed/tools/bids/bids_dataset.py +++ b/hed/tools/bids/bids_dataset.py @@ -2,12 +2,10 @@ import os import json -from hed.errors.error_reporter import ErrorHandler from hed.schema.hed_schema import HedSchema from hed.schema.hed_schema_io import load_schema_version from hed.schema.hed_schema_group import HedSchemaGroup from hed.tools.bids.bids_file_group import BidsFileGroup -from hed.validator.hed_validator import HedValidator LIBRARY_URL_BASE = "https://raw.githubusercontent.com/hed-standard/hed-schemas/main/library_schemas/" @@ -32,7 +30,7 @@ def __init__(self, root_path, schema=None, tabular_types=None, schema (HedSchema or HedSchemaGroup): A schema that overrides the one specified in dataset. tabular_types (list or None): List of strings specifying types of tabular types to include. If None or empty, then ['events'] is assumed. - exclude_dirs=['sourcedata', 'derivatives', 'code']): + exclude_dirs=['sourcedata', 'derivatives', 'code']: """ self.root_path = os.path.realpath(root_path) diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index 44f3f1a21..5a47da6ef 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -1,9 +1,8 @@ """ A group of BIDS files with specified suffix name. """ import os -from hed.errors.error_reporter import ErrorContext, ErrorHandler +from hed.errors.error_reporter import ErrorHandler from hed.validator.sidecar_validator import SidecarValidator -from hed.validator.spreadsheet_validator import SpreadsheetValidator from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.bids.bids_tabular_file import BidsTabularFile from hed.tools.bids.bids_sidecar_file import BidsSidecarFile @@ -118,7 +117,7 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings Parameters: hed_schema (HedSchema): HED schema for validation. - extra_def_dicts (DefinitionDict): Extra definitions + extra_def_dicts (DefinitionDict): Extra type_defs check_for_warnings (bool): If True, include warnings in the check. Returns: @@ -129,10 +128,10 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings error_handler = ErrorHandler(check_for_warnings) issues = [] validator = SidecarValidator(hed_schema) - + for sidecar in self.sidecar_dict.values(): name = os.path.basename(sidecar.file_path) - issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name, + issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler) return issues @@ -141,7 +140,7 @@ def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warning Parameters: hed_schema (HedSchema): Schema to apply to the validation. - extra_def_dicts (DefinitionDict): Extra definitions that come from outside. + extra_def_dicts (DefinitionDict): Extra type_defs that come from outside. check_for_warnings (bool): If True, include warnings in the check. keep_contents (bool): If True, the underlying data files are read and their contents retained. @@ -155,7 +154,7 @@ def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warning for data_obj in self.datafile_dict.values(): data_obj.set_contents(overwrite=False) name = os.path.basename(data_obj.file_path) - issues += data_obj.contents.validate(hed_schema, extra_def_dicts=None, name=name, + issues += data_obj.contents.validate(hed_schema, extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler) if not keep_contents: data_obj.clear_contents() @@ -185,7 +184,7 @@ def _make_sidecar_dict(self): dict: a dictionary of BidsSidecarFile objects keyed by real path for the specified suffix type Notes: - - This function creates the sidecars and but does not set their contents. + - This function creates the sidecars, but does not set their contents. """ files = get_file_list(self.root_path, name_suffix=self.suffix, diff --git a/hed/tools/bids/bids_tabular_file.py b/hed/tools/bids/bids_tabular_file.py index 13a46e353..f419075d7 100644 --- a/hed/tools/bids/bids_tabular_file.py +++ b/hed/tools/bids/bids_tabular_file.py @@ -1,4 +1,4 @@ -""" A BIDS tabular file including its associatedd sidecar. """ +""" A BIDS tabular file including its associated sidecar. """ import os from hed.models.tabular_input import TabularInput diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index e06922a32..ac18f2f0b 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -164,7 +164,7 @@ def _get_backups(self): :raises HedFileError: - If a backup is inconsistent for any reason. - + """ backups = {} for backup in os.listdir(self.backups_path): diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py index 5bed59e4c..6d78465dd 100644 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ b/hed/tools/remodeling/cli/run_remodel_backup.py @@ -7,7 +7,7 @@ def get_parser(): - """ Create a parser for the run_remodel_backup command-line arguments. + """ Create a parser for the run_remodel_backup command-line arguments. Returns: argparse.ArgumentParser: A parser for parsing the command line arguments. @@ -45,7 +45,7 @@ def main(arg_list=None): Otherwise, called with the command-line parameters as an argument list. :raises HedFileError: - - If the specified backup already exists. + - If the specified backup already exists. """ diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py index 7f21188d7..960bd0916 100644 --- a/hed/tools/remodeling/cli/run_remodel_restore.py +++ b/hed/tools/remodeling/cli/run_remodel_restore.py @@ -6,7 +6,7 @@ def get_parser(): - """ Create a parser for the run_remodel_restore command-line arguments. + """ Create a parser for the run_remodel_restore command-line arguments. Returns: argparse.ArgumentParser: A parser for parsing the command line arguments. diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 19e319918..b5985423b 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -99,7 +99,7 @@ def get_data_file(self, file_designator): In this case, the corresponding backup file is read and returned. - If a string is passed and there is no backup manager, the data file corresponding to the file_designator is read and returned. - - If a Pandas DataFrame is passed, a copy is returned. + - If a Pandas DataFrame is passed, return a copy. """ if isinstance(file_designator, pd.DataFrame): diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index 15423d64d..bc3e906c6 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -4,7 +4,7 @@ class BaseOp: """ Base class for operations. All remodeling operations should extend this class. - The base class holds the parameters and does basic parameter checking against the operations specification. + The base class holds the parameters and does basic parameter checking against the operation's specification. """ diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py index 2b732ae2d..f00e81401 100644 --- a/hed/tools/remodeling/operations/base_summary.py +++ b/hed/tools/remodeling/operations/base_summary.py @@ -134,7 +134,7 @@ def _save_summary_files(self, save_dir, file_format, summary, individual_summari Parameters: save_dir (str): Path to the directory in which the summaries will be saved. file_format (str): string representing the extension (including .), '.txt' or '.json'. - summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys. + summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys). """ if self.op.append_timecode: diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 675be0b2a..c5b2ca08f 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -8,6 +8,7 @@ from hed.models.sidecar import Sidecar from hed.models.df_util import get_assembled from hed.tools.analysis.analysis_util import get_expression_parsers, search_strings +from hed.tools.analysis.event_manager import EventManager class FactorHedTagsOp(BaseOp): @@ -89,6 +90,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("QueryNameAlreadyColumn", f"Query [{query_name}]: is already a column name of the data frame") df_list = [input_data.dataframe] + event_man = EventManager(input_data, dispatcher.hed_schema) hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) df_factors = search_strings(hed_strings, self.expression_parsers, query_names=self.query_names) diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 21057b798..df13e3631 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -4,8 +4,7 @@ import numpy as np from hed.tools.remodeling.operations.base_op import BaseOp from hed.models.tabular_input import TabularInput -from hed.models.sidecar import Sidecar -from hed.models.df_util import get_assembled +from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_type_manager import HedTypeManager # TODO: restricted factor values are not implemented yet. @@ -67,16 +66,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(sidecar) input_data = TabularInput(df, sidecar=sidecar, name=name) df_list = [input_data.dataframe.copy()] - hed_strings, definitions = get_assembled(input_data, sidecar, dispatcher.hed_schema, - extra_def_dicts=None, join_columns=True, - shrink_defs=True, expand_defs=False) - - var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions) - var_manager.add_type_variable(self.type_tag.lower()) + var_manager = HedTypeManager(EventManager(input_data, dispatcher.hed_schema)) + var_manager.add_type(self.type_tag.lower()) df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot") if len(df_factors.columns) > 0: diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index b0833cd1d..267a7039a 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -65,4 +65,3 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise KeyError("MissingColumnCannotBeRemoved", f"{name}: Ignore missing is False but a column in {str(self.column_names)} is " f"not in the data columns [{str(df_new.columns)}]") - return df_new diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 9607bb295..91fcfcc30 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -6,9 +6,9 @@ class ReorderColumnsOp(BaseOp): """ Reorder columns in a tabular file. Required parameters: - column_order (*list*): The names of the columns to be reordered. - ignore_missing (*bool*): If false and a column in column_order is not in df, skip the column - keep_others (*bool*): If true, columns not in column_order are placed at end. + - column_order (*list*): The names of the columns to be reordered. + - ignore_missing (*bool*): If false and a column in column_order is not in df, skip the column + - keep_others (*bool*): If true, columns not in column_order are placed at end. """ diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 858ce7e28..ea0b5dc13 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -110,7 +110,7 @@ def _add_durations(df, add_events, duration_sources): @staticmethod def _create_onsets(df, onset_source): - """ Create a vector of onsets for the the new events. + """ Create a vector of onsets for the new events. Parameters: df (DataFrame): The dataframe to process. diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index fd70e3f8d..5770a6185 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -12,7 +12,7 @@ class SummarizeColumnNamesOp(BaseOp): - **summary_name** (*str*) The name of the summary. - **summary_filename** (*str*) Base filename of the summary. - The purpose is to check that all of the tabular files have the same columns in same order. + The purpose is to check that all the tabular files have the same columns in same order. """ @@ -148,7 +148,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """ Return a string with the overall summary for all of the tabular files. + """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 94573a137..b2ecd0704 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -193,7 +193,7 @@ def _get_categorical_string(self, result, offset="", indent=" "): return "\n".join(sum_list) def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT): - """ Return a string with the overall summary for all of the tabular files. + """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 5a1e21804..28b0c6e55 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -1,4 +1,4 @@ -""" Summarize the definitions in the dataset. """ +""" Summarize the type_defs in the dataset. """ from hed import TabularInput from hed.tools.remodeling.operations.base_op import BaseOp @@ -7,7 +7,7 @@ class SummarizeDefinitionsOp(BaseOp): - """ Summarize the definitions in the dataset. + """ Summarize the type_defs in the dataset. Required remodeling parameters: - **summary_name** (*str*): The name of the summary. @@ -28,7 +28,7 @@ class SummarizeDefinitionsOp(BaseOp): } } - SUMMARY_TYPE = 'definitions' + SUMMARY_TYPE = 'type_defs' def __init__(self, parameters): """ Constructor for the summarize column values operation. @@ -49,7 +49,7 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) def do_op(self, dispatcher, df, name, sidecar=None): - """ Create summaries of definitions + """ Create summaries of type_defs Parameters: dispatcher (Dispatcher): Manages the operation I/O. @@ -135,7 +135,7 @@ def merge_all_info(self): """ Create an Object containing the definition summary. Returns: - Object - the overall summary object for definitions. + Object - the overall summary object for type_defs. """ return self.def_gatherer diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 0fcec5411..ffef53fb7 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -1,11 +1,11 @@ """ Summarize the HED tags in collection of tabular files. """ from hed.models.tabular_input import TabularInput -from hed.models.sidecar import Sidecar from hed.tools.analysis.hed_tag_counts import HedTagCounts +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager from hed.tools.remodeling.operations.base_op import BaseOp from hed.tools.remodeling.operations.base_summary import BaseSummary -from hed.models.df_util import get_assembled class SummarizeHedTagsOp(BaseOp): @@ -15,13 +15,13 @@ class SummarizeHedTagsOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. - - **tags** (*dict*): Type tag to get_summary separately (e.g. 'condition-variable' or 'task'). + - **tags** (*dict*): Specifies how to organize the tag output. Optional remodeling parameters: - **expand_context** (*bool*): If True, include counts from expanded context (not supported). - The purpose of this op is to produce a summary of the occurrences of specified tag. This summary - is often used with 'condition-variable' to produce a summary of the experimental design. + The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. + The """ @@ -35,15 +35,16 @@ class SummarizeHedTagsOp(BaseOp): }, "optional_parameters": { "append_timecode": bool, - "expand_context": bool, - "expand_definitions": bool + "include_context": bool, + "replace_defs": bool, + "remove_types": list } } SUMMARY_TYPE = "hed_tag_summary" def __init__(self, parameters): - """ Constructor for the summarize hed tags operation. + """ Constructor for the summarize_hed_tags operation. Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. @@ -61,7 +62,9 @@ def __init__(self, parameters): self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] self.append_timecode = parameters.get('append_timecode', False) - self.expand_context = parameters.get('expand_context', False) + self.include_context = parameters.get('include_context', True) + self.replace_defs = parameters.get("replace_defs", True) + self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"]) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -75,7 +78,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the context. """ @@ -93,8 +96,7 @@ class HedTagSummary(BaseSummary): def __init__(self, sum_op): super().__init__(sum_op) - self.tags = sum_op.tags - self.expand_context = sum_op.expand_context + self.sum_op = sum_op def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -107,15 +109,12 @@ def update_summary(self, new_info): """ counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) - sidecar = new_info['sidecar'] - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(sidecar) - input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) - hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], - extra_def_dicts=None, join_columns=True, - shrink_defs=False, expand_defs=True) - # definitions = input_data.get_definitions().gathered_defs - for hed in hed_strings: + input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + tag_man = HedTagManager(EventManager(input_data, new_info['schema']), + remove_types=self.sum_op.remove_types) + hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, + replace_defs=self.sum_op.replace_defs) + for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) self.summary_dict[new_info["name"]] = counts @@ -129,9 +128,9 @@ def get_details_dict(self, tag_counts): dict: dictionary with the summary results. """ - template, unmatched = tag_counts.organize_tags(self.tags) + template, unmatched = tag_counts.organize_tags(self.sum_op.tags) details = {} - for key, key_list in self.tags.items(): + for key, key_list in self.sum_op.tags.items(): details[key] = self._get_details(key_list, template, verbose=True) leftovers = [value.get_info(verbose=True) for value in unmatched] return {"Name": tag_counts.name, "Total events": tag_counts.total_events, @@ -177,7 +176,7 @@ def merge_all_info(self): @staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """ Return a string with the overall summary for all of the tabular files. + """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 04c1ad89b..6aaa4c7ea 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -2,10 +2,9 @@ from hed.models.tabular_input import TabularInput from hed.models.sidecar import Sidecar -from hed.models.df_util import get_assembled -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_type import HedType from hed.tools.analysis.hed_type_counts import HedTypeCounts -from hed.tools.analysis.hed_context_manager import HedContextManager +from hed.tools.analysis.event_manager import EventManager from hed.tools.remodeling.operations.base_op import BaseOp from hed.tools.remodeling.operations.base_summary import BaseSummary @@ -69,7 +68,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df - Side-effect: + Side effect: Updates the relevant summary. """ @@ -104,14 +103,10 @@ def update_summary(self, new_info): if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) - hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'], - extra_def_dicts=None, join_columns=True, expand_defs=False) - context_manager = HedContextManager(hed_strings, new_info['schema']) - type_values = HedTypeValues(context_manager, definitions, new_info['name'], type_tag=self.type_tag) - + type_values = HedType(EventManager(input_data, new_info['schema']), new_info['name'], type_tag=self.type_tag) counts = HedTypeCounts(new_info['name'], self.type_tag) counts.update_summary(type_values.get_summary(), type_values.total_events, new_info['name']) - counts.add_descriptions(type_values.definitions) + counts.add_descriptions(type_values.type_defs) self.summary_dict[new_info["name"]] = counts def get_details_dict(self, counts): @@ -165,7 +160,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """ Return a string with the overall summary for all of the tabular files. + """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index ce7595d55..cd3fc936b 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -66,7 +66,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df - Side-effect: + Side effect: Updates the relevant summary. """ @@ -155,7 +155,7 @@ def get_details_dict(self, summary_info): "Specifics": summary_info} def merge_all_info(self): - """ Create a dictionary containing all of the errors in the dataset. + """ Create a dictionary containing all the errors in the dataset. Returns: dict - dictionary of issues organized into sidecar_issues and event_issues. diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 28a0b9389..016a06d6d 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -68,7 +68,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Returns: DataFrame: A copy of df. - Side-effect: + Side effect: Updates the associated summary if applicable. """ @@ -124,7 +124,7 @@ def get_details_dict(self, summary_info): "Sidecar": summary_info.extract_sidecar_template()}} def merge_all_info(self): - """ Merge summary information from all of the files + """ Merge summary information from all the files. Returns: TabularSummary: Consolidated summary of information. @@ -159,7 +159,7 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): @staticmethod def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """ Return a string with the overall summary for all of the tabular files. + """ Return a string with the overall summary for all the tabular files. Parameters: result (dict): Dictionary of merged summary information. diff --git a/hed/tools/util/data_util.py b/hed/tools/util/data_util.py index 37562e189..1c787305d 100644 --- a/hed/tools/util/data_util.py +++ b/hed/tools/util/data_util.py @@ -132,7 +132,7 @@ def get_new_dataframe(data): a DataFrame to start with, a new copy of the DataFrame. :raises HedFileError: - - If a filename is given and it cannot be read into a Dataframe. + - A filename is given, and it cannot be read into a Dataframe. """ @@ -156,7 +156,7 @@ def get_row_hash(row, key_list): str: Hash key constructed from the entries of row in the columns specified by key_list. :raises HedFileError: - - If row doesn't have all of the columns in key_list HedFileError is raised. + - If row doesn't have all the columns in key_list HedFileError is raised. """ columns_present, columns_missing = separate_values(list(row.index.values), key_list) @@ -216,7 +216,7 @@ def replace_values(df, values=None, replace_value='n/a', column_list=None): """ Replace string values in specified columns. Parameters: - df (DataFrame): Dataframe whose values will replaced. + df (DataFrame): Dataframe whose values will be replaced. values (list, None): List of strings to replace. If None, only empty strings are replaced. replace_value (str): String replacement value. column_list (list, None): List of columns in which to do replacement. If None all columns are processed. diff --git a/hed/tools/util/hed_logger.py b/hed/tools/util/hed_logger.py index 74fa7262f..1d23aee71 100644 --- a/hed/tools/util/hed_logger.py +++ b/hed/tools/util/hed_logger.py @@ -1,5 +1,6 @@ """ Logger class with messages organized by key """ + class HedLogger: """ Log status messages organized by key. """ def __init__(self, name=None): diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index a120c9040..1cc9f8b7a 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -21,7 +21,7 @@ def check_filename(test_file, name_prefix=None, name_suffix=None, extensions=Non bool: True if file has the appropriate format. Notes: - - Everything is converted to lower case prior to testing so this test should be case insensitive. + - Everything is converted to lower case prior to testing so this test should be case-insensitive. - None indicates that all are accepted. @@ -158,13 +158,13 @@ def get_filtered_by_element(file_list, elements): def get_filtered_list(file_list, name_prefix=None, name_suffix=None, extensions=None): """ Get list of filenames satisfying the criteria. - Everything is converted to lower case prior to testing so this test should be case insensitive. + Everything is converted to lower case prior to testing so this test should be case-insensitive. Parameters: file_list (list): List of files to test. name_prefix (str): Optional name_prefix for the base filename. name_suffix (str): Optional name_suffix for the base filename. - extensions (list): Optional list of file extensions (allows two periods (.tsv.gz) + extensions (list): Optional list of file extensions (allows two periods (.tsv.gz)) Returns: list: The filtered file names. @@ -299,7 +299,7 @@ def parse_bids_filename(file_path): def _split_entity(piece): - """Splits an piece into an entity or suffix. + """Splits a piece into an entity or suffix. Parameters: piece (str): A string to be parsed. diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index c8d4159d7..68a3a257d 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -51,10 +51,10 @@ def summary_to_dict(summary, transform=np.log10, adjustment=5): adjustment(int): Value added after transform. Returns: word_dict(dict): a dict of the words and their occurrence count - + :raises KeyError: A malformed dictionary was passed - + """ if transform is None: transform = lambda x: x @@ -109,4 +109,4 @@ def load_and_resize_mask(mask_path, width=None, height=None): else: mask_image_array = np.array(mask_image) - return mask_image_array.astype(np.uint8) \ No newline at end of file + return mask_image_array.astype(np.uint8) diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index ba25e0133..6071a138a 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -39,6 +39,7 @@ def _draw_contour(wc, img): return Image.fromarray(ret) + # Replace WordCloud function with one that can handle transparency WordCloud._draw_contour = _draw_contour diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index fcafcf87b..c615f6edc 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -10,10 +10,10 @@ class DefValidator(DefinitionDict): """ def __init__(self, def_dicts=None, hed_schema=None): - """ Initialize for definitions in hed strings. + """ Initialize for type_defs in hed strings. Parameters: - def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass + def_dicts (list or DefinitionDict or str): DefinitionDicts containing the type_defs to pass to baseclass hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. """ super().__init__(def_dicts, hed_schema=hed_schema) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index a3293dc4f..0f4c4bd56 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -25,7 +25,7 @@ def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, defin hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation. def_dicts(DefinitionDict or list or dict): the def dicts to use for validation run_full_onset_checks(bool): If True, check for matching onset/offset tags - definitions_allowed(bool): If False, flag definitions found as errors + definitions_allowed(bool): If False, flag type_defs found as errors """ super().__init__() self._tag_validator = None diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 9e6f222fd..becbcd109 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -10,7 +10,7 @@ from hed.errors.error_reporter import check_for_any_errors -# todo: Add/improve validation for definitions being in known columns(right now it just assumes they aren't) +# todo: Add/improve validation for type_defs being in known columns(right now it just assumes they aren't) class SidecarValidator: reserved_column_names = ["HED"] reserved_category_values = ["n/a"] @@ -255,7 +255,7 @@ def _validate_pound_sign_count(self, hed_string, column_type): presence of definition tags. """ - # Make a copy without definitions to check placeholder count. + # Make a copy without type_defs to check placeholder count. expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type) hed_string_copy = copy.deepcopy(hed_string) hed_string_copy.remove_definitions() diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 025aa54d4..cdec262d1 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -27,7 +27,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Parameters: data (BaseInput or pd.DataFrame): Input data to be validated. If a dataframe, it is assumed to be assembled already. - def_dicts(list of DefDict or DefDict): all definitions to use for validation + def_dicts(list of DefDict or DefDict): all type_defs to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None Returns: diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index f0d585a70..e81304382 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -110,7 +110,7 @@ def run_tag_level_validators(self, original_tag_list, is_top_level, is_group): Notes: - This is for the top-level, all groups, and nested groups. - - This can contain definitions, Onset, etc tags. + - This can contain type_defs, Onset, etc tags. """ validation_issues = [] @@ -424,7 +424,7 @@ def check_tag_level_issue(self, original_tag_list, is_top_level, is_group): list: Validation issues. Each issue is a dictionary. Notes: - - Top-level groups can contain definitions, Onset, etc tags. + - Top-level groups can contain type_defs, Onset, etc tags. """ validation_issues = [] top_level_tags = [tag for tag in original_tag_list if diff --git a/readthedocs.yml b/readthedocs.yml index bf5d7274d..a0078918c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,7 +7,7 @@ formats: build: os: "ubuntu-22.04" tools: - python: "3.8" + python: "3.7" # Build documentation in the docs/ directory with Sphinx sphinx: @@ -19,4 +19,4 @@ sphinx: python: install: - requirements: docs/requirements.txt - system_packages: true + system_packages: true \ No newline at end of file diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index ac817fa81..4a9b60ab5 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -100,7 +100,7 @@ def run_single_test(self, test_file): error_handler = ErrorHandler(check_for_warnings) if schema: schema = load_schema_version(schema) - definitions = info['definitions'] + definitions = info['type_defs'] def_dict = DefinitionDict(definitions, schema) self.assertFalse(def_dict.issues) else: diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 5f8b2bbab..447aaae95 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -51,7 +51,7 @@ def tearDownClass(cls): def test_gathered_defs(self): # todo: probably remove this test? - # todo: add unit tests for definitions in tsv file + # todo: add unit tests for type_defs in tsv file defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) expected_defs = { 'jsonfiledef': '(Acceleration/#,Item/JsonDef1)', diff --git a/tests/models/test_spreadsheet_input.py b/tests/models/test_spreadsheet_input.py index eeee6bc8d..bf2e98b98 100644 --- a/tests/models/test_spreadsheet_input.py +++ b/tests/models/test_spreadsheet_input.py @@ -159,7 +159,7 @@ def test_no_column_header_and_convert(self): self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe)) def test_convert_short_long_with_definitions(self): - # Verify behavior works as expected even if definitions are present + # Verify behavior works as expected even if type_defs are present events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/model_tests/no_column_header_definition.tsv') hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[0, 1]) diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py index 75d143659..4b88f1bbe 100644 --- a/tests/tools/analysis/test_analysis_util_assemble_hed.py +++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py @@ -39,7 +39,7 @@ def test_assemble_hed_included_no_expand(self): self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags") self.assertEqual(first_str1.find('Def-expand'), -1, "assemble_hed with no def expand does not have Def-expand tags") - self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions") + self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of type_defs") self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.") def test_assemble_hed_included_expand(self): @@ -74,7 +74,7 @@ def test_assemble_hed_no_included_no_expand(self): self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags") self.assertEqual(first_str1.find('Def-expand'), -1, "assemble_hed with no def expand does not have Def-expand tags") - self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of definitions") + self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of type_defs") self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.") def test_assemble_hed_no_included_expand(self): @@ -95,7 +95,7 @@ def test_assemble_hed_bad_column_no_expand(self): self.assertEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags") def test_search_strings(self): - hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, + hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) queries1 = ["sensory-event"] query_names1 = ["sensory"] diff --git a/tests/tools/analysis/test_analysis_util_convert.py b/tests/tools/analysis/test_analysis_util_convert.py index 562cccb85..5c4724216 100644 --- a/tests/tools/analysis/test_analysis_util_convert.py +++ b/tests/tools/analysis/test_analysis_util_convert.py @@ -1,8 +1,7 @@ import os import unittest -from pandas import DataFrame from hed import schema as hedschema -from hed.models import HedTag, HedString, HedGroup +from hed.models import HedTag, HedString from hed.tools.analysis.analysis_util import hed_to_str @@ -44,7 +43,6 @@ def test_hed_to_str_other(self): hed_to_str(dict1) self.assertEqual(context.exception.args[0], "ContentsWrongClass") - def test_hed_to_str_obj(self): str_obj1 = HedString('Label/Cond1', self.hed_schema) str1 = hed_to_str(str_obj1) @@ -106,5 +104,6 @@ def test_hed_to_str_remove_parentheses(self): self.assertIsInstance(str3, str) self.assertEqual(str3, 'Label/Cond1') + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py index 036b4c938..5a3972a37 100644 --- a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py +++ b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py @@ -1,9 +1,7 @@ import os import unittest from hed import schema as hedschema -from hed.models.hed_string import HedString from hed.models.tabular_input import TabularInput -# from hed.tools.analysis.analysis_util import get_assembled_strings # noinspection PyBroadException @@ -40,7 +38,7 @@ def setUp(self): # "get_assembled_strings should not have Def-expand when expand_defs is False") # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, # "get_assembled_strings should have Def/ when expand_defs is False") - # + # # def test_get_assembled_strings_no_schema_def_expand(self): # hed_list2 = get_assembled_strings(self.input_data, self.hed_schema, expand_defs=True) # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") @@ -53,7 +51,7 @@ def setUp(self): # "get_assembled_strings should have Def-expand when expand_defs is True") # self.assertEqual(hed_strings_joined2.find("Def/"), -1, # "get_assembled_strings should not have Def/ when expand_defs is True") - # + # # def test_get_assembled_strings_with_schema_no_def_expand(self): # hed_list1 = get_assembled_strings(self. input_data, hed_schema=self.hed_schema, expand_defs=False) # self.assertIsInstance(hed_list1, list, "get_assembled_strings returns a list when expand defs is False") @@ -66,7 +64,7 @@ def setUp(self): # "get_assembled_strings does not have Def-expand when expand_defs is False") # self.assertNotEqual(hed_strings_joined1.find("Def/"), -1, # "get_assembled_strings should have Def/ when expand_defs is False") - # + # # def test_get_assembled_strings_with_schema_def_expand(self): # hed_list2 = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=True) # self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list") @@ -79,7 +77,7 @@ def setUp(self): # "get_assembled_strings should have Def-expand when expand_defs is True") # self.assertEqual(hed_strings_joined2.find("Def/"), -1, # "get_assembled_strings should not have Def/ when expand_defs is True") - # + # # def test_get_assembled_strings_no_sidecar_no_schema(self): # input_data = TabularInput(self.events_path, name="face_sub1_events") # hed_list1 = get_assembled_strings(input_data, expand_defs=False) @@ -94,7 +92,7 @@ def setUp(self): # self.assertIsInstance(hed_list2[0], HedString, # "get_assembled_string should return an HedString when no sidecar") # self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar") - # + # # def test_get_assembled_strings_no_sidecar_schema(self): # input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, name="face_sub1_events") # hed_list1 = get_assembled_strings(input_data, expand_defs=False) diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index f54dd1dc8..c0e9b2b6f 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -12,7 +12,6 @@ generate_sidecar_entry from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.util.io_util import get_file_list -from hed.validator import HedValidator # noinspection PyBroadException @@ -359,7 +358,8 @@ def test_flatten_cat_col(self): "_flatten_cat_col should use the Description tag if available") def test_flatten_cat_col_only_description(self): - keys, values, descriptions, tags = _flatten_cat_col("event_type", {"HED": {"code1": "Description/Code 1 here."}}) + keys, values, descriptions, tags = _flatten_cat_col("event_type", + {"HED": {"code1": "Description/Code 1 here."}}) self.assertIsInstance(tags, list) self.assertEqual(tags[0], 'n/a') diff --git a/tests/tools/analysis/test_column_name_summary.py b/tests/tools/analysis/test_column_name_summary.py index 31cb551c0..57c6ba4ba 100644 --- a/tests/tools/analysis/test_column_name_summary.py +++ b/tests/tools/analysis/test_column_name_summary.py @@ -54,6 +54,7 @@ def test_get_summary(self): column_summary.update('run-01', self.columns1) column_summary.update('run-02', self.columns1) summary1 = column_summary.get_summary() + self.assertIsInstance(summary1, dict) column_summary.update('run-03', self.columns2) column_summary.update('run-04', self.columns3) summary2 = column_summary.get_summary() diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py index 5e7937f7e..3094c8cec 100644 --- a/tests/tools/analysis/test_event_manager.py +++ b/tests/tools/analysis/test_event_manager.py @@ -1,8 +1,7 @@ import os import unittest -from hed.models.sidecar import Sidecar -from hed.models.df_util import get_assembled +from hed.models.sidecar import Sidecar, HedString from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version from hed.tools.analysis.event_manager import EventManager @@ -31,49 +30,53 @@ def test_constructor(self): self.assertEqual(len(manager1.event_list[0]), 2) self.assertIsInstance(manager1.hed_strings, list) self.assertEqual(len(manager1.hed_strings), len(self.input_data.dataframe)) - # self.assertEqual(len(manager1.event_list), len(self.input_data.dataframe)) - # event_count = 0 - # for index, item in enumerate(manager1.event_list): - # for event in item: - # event_count = event_count + 1 - # self.assertFalse(event.duration) - # self.assertTrue(event.end_index) - # self.assertEqual(event.start_index, index) - # self.assertEqual(event.start_index, index) - # self.assertEqual(event.start_time, manager1.data.dataframe.loc[index, "onset"]) - # if not event.end_time: - # self.assertEqual(event.end_index, len(manager1.data.dataframe)) + self.assertEqual(len(manager1.event_list), len(self.input_data.dataframe)) + event_count = 0 + for index, item in enumerate(manager1.event_list): + for event in item: + event_count = event_count + 1 + self.assertTrue(event.end_index) + self.assertEqual(event.start_index, index) + self.assertEqual(event.start_index, index) + self.assertEqual(event.start_time, float(manager1.input_data.dataframe.loc[index, "onset"])) + if not event.end_time: + self.assertEqual(event.end_index, len(manager1.input_data.dataframe)) - # def test_constructor(self): - # with self.assertRaises(ValueError) as cont: - # HedContextManager(self.test_strings1, None) - # self.assertEqual(cont.exception.args[0], "ContextRequiresSchema") + def test_unfold_context_no_remove(self): + manager1 = EventManager(self.input_data, self.schema) + hed, base, context = manager1.unfold_context() + for index in range(len(manager1.onsets)): + self.assertIsInstance(hed[index], str) + self.assertIsInstance(base[index], str) - # def test_iter(self): - # hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - # manager1 = HedContextManager(hed_strings, self.schema) - # i = 0 - # for hed, context in manager1.iter_context(): - # self.assertEqual(hed, manager1.hed_strings[i]) - # self.assertEqual(context, manager1.contexts[i]) - # i = i + 1 + def test_unfold_context_remove(self): + manager1 = EventManager(self.input_data, self.schema) + hed, base, context = manager1.unfold_context(remove_types=['Condition-variable', 'Task']) + for index in range(len(manager1.onsets)): + self.assertIsInstance(hed[index], str) + self.assertIsInstance(base[index], str) + # ToDo finish tests - # def test_constructor_from_assembled(self): - # hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - # manager1 = HedContextManager(hed_strings, self.schema) - # self.assertEqual(len(manager1.hed_strings), 200, - # "The constructor for assembled strings has expected # of strings") - # self.assertEqual(len(manager1.onset_list), 261, - # "The constructor for assembled strings has onset_list of correct length") + def test_str_list_to_hed(self): + manager = EventManager(self.input_data, self.schema) + hed_obj1 = manager.str_list_to_hed(['', '', '']) + self.assertFalse(hed_obj1) + hed, base, context = manager.unfold_context() - # def test_constructor_unmatched(self): - # with self.assertRaises(HedFileError) as context: - # HedContextManager(self.test_strings2, self.schema) - # self.assertEqual(context.exception.args[0], 'UnmatchedOffset') + hed_obj2 = manager.str_list_to_hed([hed[1], base[1], '(Event-context, (' + context[1] + '))']) + self.assertIsInstance(hed_obj2, HedString) + self.assertEqual(10, len(hed_obj2.children)) + hed3, base3, context3 = manager.unfold_context(remove_types=['Condition-variable', 'Task']) - # def test_constructor_multiple_values(self): - # manager = HedContextManager(self.test_strings3, self.schema) - # self.assertEqual(len(manager.onset_list), 3, "Constructor should have right number of onsets") + hed_obj3 = manager.str_list_to_hed([hed3[1], base3[1], '(Event-context, (' + context3[1] + '))']) + self.assertIsInstance(hed_obj3, HedString) + self.assertEqual(6, len(hed_obj3.children)) + + def test_get_type_defs(self): + manager1 = EventManager(self.input_data, self.schema) + def_names = manager1.get_type_defs(["Condition-variable", "task"]) + self.assertIsInstance(def_names, list) + self.assertEqual(11, len(def_names)) if __name__ == '__main__': diff --git a/tests/tools/analysis/test_hed_context_manager.py b/tests/tools/analysis/test_hed_context_manager.py deleted file mode 100644 index 2ac042453..000000000 --- a/tests/tools/analysis/test_hed_context_manager.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -import unittest -from hed.errors.exceptions import HedFileError -from hed.models.hed_string import HedString -from hed.models.sidecar import Sidecar -from hed.models.tabular_input import TabularInput -from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.models.df_util import get_assembled - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = [HedString('Sensory-event,(Def/Cond1,(Red, Blue),Onset),(Def/Cond2,Onset),Green,Yellow', - hed_schema=schema), - HedString('(Def/Cond1, Offset)', hed_schema=schema), - HedString('White, Black', hed_schema=schema), - HedString('', hed_schema=schema), - HedString('(Def/Cond2, Onset)', hed_schema=schema), - HedString('(Def/Cond3/1.3, Onset)', hed_schema=schema), - HedString('Arm, Leg', hed_schema=schema)] - cls.test_strings2 = [HedString('(Def/Cond3/2, Offset)', hed_schema=schema)] - cls.test_strings3 = [HedString("Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - hed_schema=schema), - HedString("Yellow", hed_schema=schema), - HedString("Def/Cond2, (Def/Cond6/4, Onset)", hed_schema=schema), - HedString("Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", hed_schema=schema), - HedString("Def/Cond2, Def/Cond6/4", hed_schema=schema)] - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) - events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - cls.sidecar1 = sidecar1 - cls.schema = schema - - # def test_onset_group(self): - # str1 = '(Def/Temper, (Label/help))' - # str1_obj = HedString(str1) - # grp1 = str1_obj.chilren()[0] - # on_grp1 = OnsetGroup('this_group', x, 1) - # self.assertIsInstance(grp1.contents, str) - # self.assertEqual(grp1.contents, '(Def/Temper,(Label/help))') - # str1_obj = HedString(str1) - # grp2 = - # self.assertIsInstance(grp2.contents, str) - # self.assertEqual(grp2.contents, str1) - # - # y = HedGroup(str1) - # grp3 = OnsetGroup('this_group', y, 0) - # self.assertIsInstance(grp3.contents, str) - # self.assertEqual(grp3.contents, str1) - # grp4 = OnsetGroup('this_group', x, 0, 10) - # self.assertIsInstance(grp4.contents, str) - # self.assertEqual(grp4.contents, str1) - - def test_constructor(self): - manager1 = HedContextManager(self.test_strings1, self.schema) - self.assertIsInstance(manager1, HedContextManager, "The constructor should create an HedContextManager") - self.assertEqual(len(manager1.hed_strings), 7, "The constructor should have the right number of strings") - self.assertEqual(len(manager1.onset_list), 4, "The constructor should have right length onset list") - self.assertIsInstance(manager1.hed_strings[1], HedString, "Constructor hed string should be a hedstring") - self.assertFalse(manager1.hed_strings[1].children, "When no tags list HedString is empty") - context = manager1.contexts - self.assertIsInstance(context, list, "The constructor event contexts should be a list") - self.assertIsInstance(context[1], HedString, "The constructor event contexts has a correct element") - - def test_constructor1(self): - with self.assertRaises(ValueError) as cont: - HedContextManager(self.test_strings1, None) - self.assertEqual(cont.exception.args[0], "ContextRequiresSchema") - - def test_iter(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - manager1 = HedContextManager(hed_strings, self.schema) - i = 0 - for hed, context in manager1.iter_context(): - self.assertEqual(hed, manager1.hed_strings[i]) - self.assertEqual(context, manager1.contexts[i]) - i = i + 1 - - def test_constructor_from_assembled(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - manager1 = HedContextManager(hed_strings, self.schema) - self.assertEqual(len(manager1.hed_strings), 200, - "The constructor for assembled strings has expected # of strings") - self.assertEqual(len(manager1.onset_list), 261, - "The constructor for assembled strings has onset_list of correct length") - - def test_constructor_unmatched(self): - with self.assertRaises(HedFileError) as context: - HedContextManager(self.test_strings2, self.schema) - self.assertEqual(context.exception.args[0], 'UnmatchedOffset') - - def test_constructor_multiple_values(self): - manager = HedContextManager(self.test_strings3, self.schema) - self.assertEqual(len(manager.onset_list), 3, "Constructor should have right number of onsets") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index 5f2eebc27..2821abd55 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -78,7 +78,7 @@ def test_organize_tags(self): hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) - # definitions = input_data.get_definitions().gathered_defs + # type_defs = input_data.get_definitions().gathered_defs for hed in hed_strings: counts.update_event_counts(hed, 'run-1') self.assertIsInstance(counts.tag_dict, dict) diff --git a/tests/tools/analysis/test_hed_tag_manager.py b/tests/tools/analysis/test_hed_tag_manager.py new file mode 100644 index 000000000..cca0b551b --- /dev/null +++ b/tests/tools/analysis/test_hed_tag_manager.py @@ -0,0 +1,172 @@ +import os +import unittest +from pandas import DataFrame +from hed.models import DefinitionDict +from hed.models.hed_string import HedString +from hed.models.tabular_input import TabularInput +from hed.schema.hed_schema_io import load_schema_version +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + schema = load_schema_version(xml_version="8.2.0") + # Set up the definition dictionary + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3/#, (Condition-variable/Var3, Label/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Apple, Banana))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Arm, Leg, Condition-variable/Fast"] + test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] + df1 = DataFrame(test_onsets1, columns=['onset']) + df1['HED'] = test_strings1 + input_data1 = TabularInput(df1) + cls.event_man1 = EventManager(input_data1, schema, extra_defs=def_dict) + test_strings2 = ["Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", + "Yellow", + "Def/Cond2, (Def/Cond6/4, Onset)", + "Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", + "Def/Cond2, Def/Cond6/4"] + test_onsets2 = [0.0, 1.0, 2.0, 3.0, 4.0] + df2 = DataFrame(test_onsets2, columns=['onset']) + df2['HED'] = test_strings2 + input_data2 = TabularInput(df2) + cls.event_man2 = EventManager(input_data2, schema, extra_defs=def_dict) + test_strings3 = ['(Def/Cond3, Offset)'] + test_onsets3 = [0.0] + df3 = DataFrame(test_onsets3, columns=['onset']) + df3['HED'] = test_strings3 + cls.input_data3 = TabularInput(df3) + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids_tests/eeg_ds003645s_hed')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + cls.input_data = TabularInput(events_path, sidecar_path) + cls.schema = schema + cls.def_dict = def_dict + + # def test_constructor(self): + # type_var = HedType(self.event_man1, 'test-it') + # self.assertIsInstance(type_var, HedType, "Constructor should create a HedType from an event manager") + # self.assertEqual(len(type_var._type_map), 8, + # "Constructor ConditionVariables should have the right length") + + def test_constructor_from_tabular_input(self): + event_man = EventManager(self.input_data, self.schema) + tag_man1 = HedTagManager(EventManager(self.input_data, self.schema)) + self.assertIsInstance(tag_man1, HedTagManager) + hed_objs1a = tag_man1.get_hed_objs(include_context=False, replace_defs=False) + hed_objs1b = tag_man1.get_hed_objs(include_context=True, replace_defs=False) + hed_objs1c = tag_man1.get_hed_objs(include_context=False, replace_defs=True) + hed_objs1d = tag_man1.get_hed_objs(include_context=True, replace_defs=True) + tag_man2 = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) + hed_objs2a = tag_man2.get_hed_objs(include_context=False, replace_defs=False) + hed_objs2b = tag_man2.get_hed_objs(include_context=True, replace_defs=False) + hed_objs1c = tag_man2.get_hed_objs(include_context=False, replace_defs=True) + hed_objs1d = tag_man2.get_hed_objs(include_context=True, replace_defs=True) + self.assertIsInstance(tag_man2, HedTagManager) + self.assertIsInstance(tag_man2, HedTagManager) + + def test_get_hed_objs(self): + event_man = EventManager(self.input_data, self.schema) + tag_man1 = HedTagManager(EventManager(self.input_data, self.schema)) + # tag_man = HedTagManager(event_man, remove_types=['Condition-variable', 'Task']) + # hed_objs = tag_man.get_hed_objs() + # self.assertIsInstance(hed_objs, list) + # self.assertEqual(len(hed_objs), len(event_man.onsets)) + + # def test_constructor_variable_caps(self): + # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + # event_man = EventManager(input_data, self.schema) + # var_manager = HedType(event_man, 'run-01') + # self.assertIsInstance(var_manager, HedType, "Constructor should create a HedTypeManager variable caps") + # + # def test_constructor_multiple_values(self): + # type_var = HedType(self.event_man2, 'test-it') + # self.assertIsInstance(type_var, HedType, "Constructor should create a HedType from an event manager") + # self.assertEqual(len(type_var._type_map), 3, + # "Constructor should have right number of type_variables if multiple") + # + # def test_constructor_unmatched(self): + # with self.assertRaises(KeyError) as context: + # event_man = EventManager(self.input_data3, self.schema, extra_defs=self.def_dict) + # HedType(event_man, 'run-01') + # self.assertEqual(context.exception.args[0], 'cond3') + # + # def test_get_variable_factors(self): + # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + # event_man = EventManager(input_data, self.schema) + # var_manager = HedType(event_man, 'run-01') + # df_new1 = var_manager.get_type_factors() + # self.assertIsInstance(df_new1, DataFrame) + # self.assertEqual(len(df_new1), 200) + # self.assertEqual(len(df_new1.columns), 7) + # df_new2 = var_manager.get_type_factors(type_values=["face-type"]) + # self.assertEqual(len(df_new2), 200) + # self.assertEqual(len(df_new2.columns), 3) + # df_new3 = var_manager.get_type_factors(type_values=["junk"]) + # self.assertIsNone(df_new3) + # + # def test_str(self): + # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + # event_man = EventManager(input_data, self.schema) + # var_manager = HedType(event_man, 'run-01') + # new_str = str(var_manager) + # self.assertIsInstance(new_str, str) + # + # def test_summarize_variables(self): + # sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + # input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + # event_man = EventManager(input_data, self.schema) + # var_manager = HedType(event_man, 'run-01') + # summary = var_manager.get_summary() + # self.assertIsInstance(summary, dict, "get_summary produces a dictionary if not json") + # self.assertEqual(len(summary), 3, "Summarize_variables has right number of condition type_variables") + # self.assertIn("key-assignment", summary, "get_summary has a correct key") + # + # def test_extract_definition_variables(self): + # var_manager = HedType(self.event_man1, 'run-01') + # var_levels = var_manager._type_map['var3'].levels + # self.assertNotIn('cond3/7', var_levels, + # "_extract_definition_variables before extraction def/cond3/7 not in levels") + # tag = HedTag("Def/Cond3/7", hed_schema=self.schema) + # var_manager._extract_definition_variables(tag, 5) + # self.assertIn('cond3/7', var_levels, + # "_extract_definition_variables after extraction def/cond3/7 not in levels") + # + # def test_get_variable_names(self): + # conditions1 = HedType(self.event_man1, 'run-01') + # list1 = conditions1.get_type_value_names() + # self.assertEqual(len(list1), 8, "get_variable_tags list should have the right length") + # + # def test_get_variable_def_names(self): + # conditions1 = HedType(self.event_man1, 'run-01') + # list1 = conditions1.get_type_def_names() + # self.assertEqual(len(list1), 5, "get_type_def_names list should have the right length") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_hed_type.py b/tests/tools/analysis/test_hed_type.py new file mode 100644 index 000000000..662b82ccb --- /dev/null +++ b/tests/tools/analysis/test_hed_type.py @@ -0,0 +1,157 @@ +import os +import unittest +from pandas import DataFrame +from hed.models import DefinitionDict +from hed.models.hed_string import HedString +from hed.models.hed_tag import HedTag +from hed.models.sidecar import Sidecar +from hed.models.tabular_input import TabularInput +from hed.schema.hed_schema_io import load_schema_version +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_type import HedType + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + schema = load_schema_version(xml_version="8.2.0") + # Set up the definition dictionary + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3/#, (Condition-variable/Var3, Label/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Rectangle, Triangle))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Action, Sensory-presentation))', + hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Agent, Move))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + test_strings1 = ["Sensory-event,(Def/Cond1,(Elbow, Hip, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Upper-arm, Head, Condition-variable/Fast"] + test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] + df1 = DataFrame(test_onsets1, columns=['onset']) + df1['HED'] = test_strings1 + input_data1 = TabularInput(df1) + cls.event_man1 = EventManager(input_data1, schema, extra_defs=def_dict) + test_strings2 = ["Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", + "Yellow", + "Def/Cond2, (Def/Cond6/4, Onset)", + "Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", + "Def/Cond2, Def/Cond6/4"] + test_onsets2 = [0.0, 1.0, 2.0, 3.0, 4.0] + df2 = DataFrame(test_onsets2, columns=['onset']) + df2['HED'] = test_strings2 + input_data2 = TabularInput(df2) + cls.event_man2 = EventManager(input_data2, schema, extra_defs=def_dict) + test_strings3 = ['(Def/Cond3, Offset)'] + test_onsets3 = [0.0] + df3 = DataFrame(test_onsets3, columns=['onset']) + df3['HED'] = test_strings3 + cls.input_data3 = TabularInput(df3) + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids_tests/eeg_ds003645s_hed')) + cls.events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + cls.sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + cls.schema = schema + cls.def_dict = def_dict + + def test_constructor(self): + type_var = HedType(self.event_man1, 'test-it') + self.assertIsInstance(type_var, HedType, "Constructor should create a HedType from an event manager") + self.assertEqual(len(type_var._type_map), 8, + "Constructor ConditionVariables should have the right length") + + def test_constructor_from_tabular_input(self): + sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events") + event_man = EventManager(input_data, self.schema) + var_man = HedType(event_man, 'face') + self.assertIsInstance(var_man, HedType, "Constructor should create a HedTypeManager from a tabular input") + + def test_constructor_variable_caps(self): + sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + event_man = EventManager(input_data, self.schema) + var_manager = HedType(event_man, 'run-01') + self.assertIsInstance(var_manager, HedType, "Constructor should create a HedTypeManager variable caps") + + def test_constructor_multiple_values(self): + type_var = HedType(self.event_man2, 'test-it') + self.assertIsInstance(type_var, HedType, "Constructor should create a HedType from an event manager") + self.assertEqual(len(type_var._type_map), 3, + "Constructor should have right number of type_variables if multiple") + + def test_constructor_unmatched(self): + with self.assertRaises(KeyError) as context: + event_man = EventManager(self.input_data3, self.schema, extra_defs=self.def_dict) + HedType(event_man, 'run-01') + self.assertEqual(context.exception.args[0], 'cond3') + + def test_get_variable_factors(self): + sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + event_man = EventManager(input_data, self.schema) + var_manager = HedType(event_man, 'run-01') + df_new1 = var_manager.get_type_factors() + self.assertIsInstance(df_new1, DataFrame) + self.assertEqual(len(df_new1), 200) + self.assertEqual(len(df_new1.columns), 7) + df_new2 = var_manager.get_type_factors(type_values=["face-type"]) + self.assertEqual(len(df_new2), 200) + self.assertEqual(len(df_new2.columns), 3) + df_new3 = var_manager.get_type_factors(type_values=["junk"]) + self.assertIsNone(df_new3) + + def test_str(self): + sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + event_man = EventManager(input_data, self.schema) + var_manager = HedType(event_man, 'run-01') + new_str = str(var_manager) + self.assertIsInstance(new_str, str) + + def test_summarize_variables(self): + sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') + input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") + event_man = EventManager(input_data, self.schema) + var_manager = HedType(event_man, 'run-01') + summary = var_manager.get_summary() + self.assertIsInstance(summary, dict, "get_summary produces a dictionary if not json") + self.assertEqual(len(summary), 3, "Summarize_variables has right number of condition type_variables") + self.assertIn("key-assignment", summary, "get_summary has a correct key") + + def test_extract_definition_variables(self): + var_manager = HedType(self.event_man1, 'run-01') + var_levels = var_manager._type_map['var3'].levels + self.assertNotIn('cond3/7', var_levels, + "_extract_definition_variables before extraction def/cond3/7 not in levels") + tag = HedTag("Def/Cond3/7", hed_schema=self.schema) + var_manager._extract_definition_variables(tag, 5) + self.assertIn('cond3/7', var_levels, + "_extract_definition_variables after extraction def/cond3/7 not in levels") + + def test_get_variable_names(self): + conditions1 = HedType(self.event_man1, 'run-01') + list1 = conditions1.get_type_value_names() + self.assertEqual(len(list1), 8, "get_variable_tags list should have the right length") + + def test_get_variable_def_names(self): + conditions1 = HedType(self.event_man1, 'run-01') + list1 = conditions1.get_type_def_names() + self.assertEqual(len(list1), 5, "get_type_def_names list should have the right length") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_hed_type_counts.py b/tests/tools/analysis/test_hed_type_counts.py index c4fd22cab..d8c380bdb 100644 --- a/tests/tools/analysis/test_hed_type_counts.py +++ b/tests/tools/analysis/test_hed_type_counts.py @@ -3,10 +3,9 @@ from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_type import HedType from hed.tools.analysis.hed_type_counts import HedTypeCount, HedTypeCounts -from hed.models.df_util import get_assembled class Test(unittest.TestCase): @@ -21,10 +20,7 @@ def setUpClass(cls): sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - hed_strings1, definitions1 = get_assembled(input_data, sidecar1, schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - cls.var_type1 = HedTypeValues(HedContextManager(hed_strings1, schema), definitions1, 'run-01', - type_tag='condition-variable') + cls.var_type1 = HedType(EventManager(input_data, schema), 'run-01', type_tag='condition-variable') def test_type_count_one_level(self): type_counts1 = HedTypeCounts('Dummy', "condition-variable") @@ -62,7 +58,7 @@ def test_get_summary_multiple_levels(self): self.assertEqual(face_type.total_events, 400) self.assertEqual(face_type.events, 104) self.assertEqual(len(face_type.files), 2) - counts.add_descriptions(self.var_type1.definitions) + counts.add_descriptions(self.var_type1.type_defs) self.assertTrue(face_type.level_counts['famous-face-cond']['description']) diff --git a/tests/tools/analysis/test_hed_type_definitions.py b/tests/tools/analysis/test_hed_type_definitions.py deleted file mode 100644 index 7388d1228..000000000 --- a/tests/tools/analysis/test_hed_type_definitions.py +++ /dev/null @@ -1,114 +0,0 @@ -import os -import unittest -from hed.models import DefinitionEntry -from hed.models.hed_string import HedString -from hed.models.hed_tag import HedTag -from hed.models.sidecar import Sidecar -from hed.models.tabular_input import TabularInput -from hed.tools.analysis.hed_type_definitions import HedTypeDefinitions -from hed.schema.hed_schema_io import load_schema_version - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = [HedString('Sensory-event,(Def/Cond1,(Red, Blue),Onset),(Def/Cond2,Onset),Green,Yellow', - hed_schema=schema), - HedString('(Def/Cond1, Offset)', hed_schema=schema), - HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', - hed_schema=schema), - HedString('', hed_schema=schema), - HedString('(Def/Cond2, Onset)', hed_schema=schema), - HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema), - HedString('Arm, Leg, Condition-variable/Fast', hed_schema=schema)] - def1 = HedString('(Condition-variable/Var1, Circle, Square, Description/This is def1)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana, Description/This is def4)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana, Description/This is def5)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana, Description/This is def6)', hed_schema=schema) - cls.definitions1 = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) - events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - cls.schema = schema - cls.sidecar1 = sidecar1 - - def test_constructor(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) - self.assertIsInstance(def_man, HedTypeDefinitions, - "Constructor should create a HedTypeDefinitions directly from a dict") - self.assertEqual(len(def_man.def_map), 6, "Constructor condition_map should have the right length") - self.assertEqual(len(def_man.def_map), len(def_man.definitions), - "Constructor condition_map should be the same length as the definitions dictionary") - - def test_constructor_from_sidecar(self): - definitions = self.sidecar1.get_def_dict(self.schema) - def_man = HedTypeDefinitions(definitions, self.schema) - self.assertIsInstance(def_man, HedTypeDefinitions, - "Constructor should create a HedTypeDefinitions from a tabular input") - self.assertEqual(len(def_man.def_map), 17, "Constructor condition_map should have the right length") - self.assertEqual(len(def_man.def_map), len(def_man.definitions), - "Constructor condition_map should be the same length as the definitions dictionary") - - def test_get_vars(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) - item1 = HedString("Sensory-event,((Red,Blue)),", self.schema) - vars1 = def_man.get_type_values(item1) - self.assertFalse(vars1, "get_type_values should return None if no condition type_variables") - item2 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))", self.schema) - vars2 = def_man.get_type_values(item2) - self.assertEqual(len(vars2), 1, "get_type_values should return correct number of condition type_variables") - item3 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))," - f"(Def/Cond2),Green,Yellow,Def/Cond5, Def/Cond6/4, Description/Tell me", self.schema) - vars3 = def_man.get_type_values(item3) - self.assertEqual(len(vars3), 5, "get_type_values should return multiple condition type_variables") - - def test_get_def_names(self): - def_man = HedTypeDefinitions(self.definitions1, self.schema) - a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) - self.assertEqual(len(a), 1, "get_def_names returns 1 item if single tag") - self.assertEqual(a[0], 'cond3', "get_def_names returns the correct item if single tag") - b = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema), no_value=False) - self.assertEqual(len(b), 1, "get_def_names returns 1 item if single tag") - self.assertEqual(b[0], 'cond3/4', "get_def_names returns the correct item if single tag") - c = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=self.schema)) - self.assertEqual(len(c), 1, "get_def_names returns 1 item if single group def") - self.assertEqual(c[0], 'cond3', "get_def_names returns the correct item if single group def") - d = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=self.schema), - no_value=False) - self.assertEqual(len(d), 3, "get_def_names returns right number of items if multiple defs") - self.assertEqual(d[0], 'cond3/6', "get_def_names returns the correct item if multiple def") - e = def_man.get_def_names(HedString('((Red, Blue, (Green), Black))', hed_schema=self.schema)) - self.assertFalse(e, "get_def_names returns no items if no defs") - - def test_split_name(self): - name1, val1 = HedTypeDefinitions.split_name('') - self.assertIsNone(name1, "split_name should return None split name for empty name") - self.assertIsNone(val1, "split_name should return None split value for empty name") - name2, val2 = HedTypeDefinitions.split_name('lumber') - self.assertEqual(name2, 'lumber', 'split_name should return name if no split value') - self.assertEqual(val2, '', 'split_name should return empty string if no split value') - name3, val3 = HedTypeDefinitions.split_name('Lumber/5.23', lowercase=False) - self.assertEqual(name3, 'Lumber', 'split_name should return name if split value') - self.assertEqual(val3, '5.23', 'split_name should return value as string if split value') - name4, val4 = HedTypeDefinitions.split_name('Lumber/5.23') - self.assertEqual(name4, 'lumber', 'split_name should return name if split value') - self.assertEqual(val4, '5.23', 'split_name should return value as string if split value') - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/analysis/test_hed_type_defs.py b/tests/tools/analysis/test_hed_type_defs.py new file mode 100644 index 000000000..9e64c3298 --- /dev/null +++ b/tests/tools/analysis/test_hed_type_defs.py @@ -0,0 +1,132 @@ +import os +import unittest +from hed.models import DefinitionDict +from hed.models.hed_string import HedString +from hed.models.hed_tag import HedTag +from hed.models.sidecar import Sidecar +from hed.models.tabular_input import TabularInput +from hed.tools.analysis.hed_type_defs import HedTypeDefs +from hed.schema.hed_schema_io import load_schema_version + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + schema = load_schema_version(xml_version="8.1.0") + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString('(Definition/Cond3/#, (Condition-variable/Var3, Label/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Rectangle, Triangle))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Action, Sensory-presentation))', + hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Agent, Move))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + + cls.test_strings1 = ["Sensory-event,(Def/Cond1,(Elbow, Hip, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Upper-arm, Head, Condition-variable/Fast"] + cls.definitions1 = def_dict + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids_tests/eeg_ds003645s_hed')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') + cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + cls.schema = schema + cls.sidecar1 = sidecar1 + + def test_constructor(self): + def_man = HedTypeDefs(self.definitions1) + self.assertIsInstance(def_man, HedTypeDefs, + "Constructor should create a HedTypeDefinitions directly from a dict") + self.assertEqual(len(def_man.def_map), 6, "Constructor condition_map should have the right length") + self.assertEqual(len(def_man.def_map), len(def_man.definitions), + "Constructor condition_map should be the same length as the type_defs dictionary") + + def test_constructor_from_sidecar(self): + definitions = self.sidecar1.get_def_dict(self.schema) + def_man = HedTypeDefs(definitions) + self.assertIsInstance(def_man, HedTypeDefs, + "Constructor should create a HedTypeDefinitions from a tabular input") + self.assertEqual(len(def_man.def_map), 8, "Constructor condition_map should have the right length") + self.assertEqual(len(def_man.definitions), len(definitions)) + defs = def_man.type_def_names + self.assertIsInstance(defs, list) + self.assertEqual(len(defs), 8) + + def test_constructor_from_tabular(self): + def_dict = self.input_data.get_def_dict(self.schema) + def_man = HedTypeDefs(def_dict, type_tag="Condition-variable") + self.assertIsInstance(def_man, HedTypeDefs) + self.assertEqual(len(def_man.def_map), 8) + self.assertEqual(len(def_man.type_map), 3) + self.assertEqual(len(def_man.type_def_names), 8) + + def test_get_type_values_tabular(self): + def_dict = self.input_data.get_def_dict(self.schema) + def_man = HedTypeDefs(def_dict, type_tag="Condition-variable") + test_str = HedString("Sensory-event, Def/Right-sym-cond", self.schema) + values1 = def_man.get_type_values(test_str) + self.assertIsInstance(values1, list) + self.assertEqual(1, len(values1)) + + def test_get_type_values(self): + def_man = HedTypeDefs(self.definitions1) + item1 = HedString("Sensory-event,((Red,Blue)),", self.schema) + vars1 = def_man.get_type_values(item1) + self.assertFalse(vars1, "get_type_values should return None if no condition type_variables") + item2 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))", self.schema) + vars2 = def_man.get_type_values(item2) + self.assertEqual(1, len(vars2), "get_type_values should return correct number of condition type_variables") + item3 = HedString(f"Sensory-event,(Def/Cond1,(Red,Blue,Condition-variable/Trouble))," + f"(Def/Cond2),Green,Yellow,Def/Cond5, Def/Cond6/4, Description/Tell me", self.schema) + vars3 = def_man.get_type_values(item3) + self.assertEqual(len(vars3), 5, "get_type_values should return multiple condition type_variables") + + def test_extract_def_names(self): + def_man = HedTypeDefs(self.definitions1) + a = def_man.extract_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) + self.assertEqual(len(a), 1, "get_def_names returns 1 item if single tag") + self.assertEqual(a[0], 'cond3', "get_def_names returns the correct item if single tag") + b = def_man.extract_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema), no_value=False) + self.assertEqual(len(b), 1, "get_def_names returns 1 item if single tag") + self.assertEqual(b[0], 'cond3/4', "get_def_names returns the correct item if single tag") + c = def_man.extract_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=self.schema)) + self.assertEqual(len(c), 1, "get_def_names returns 1 item if single group def") + self.assertEqual(c[0], 'cond3', "get_def_names returns the correct item if single group def") + d = def_man.extract_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', + hed_schema=self.schema), no_value=False) + self.assertEqual(len(d), 3, "get_def_names returns right number of items if multiple defs") + self.assertEqual(d[0], 'cond3/6', "get_def_names returns the correct item if multiple def") + e = def_man.extract_def_names(HedString('((Red, Blue, (Green), Black))', hed_schema=self.schema)) + self.assertFalse(e, "get_def_names returns no items if no defs") + + def test_split_name(self): + name1, val1 = HedTypeDefs.split_name('') + self.assertIsNone(name1, "split_name should return None split name for empty name") + self.assertIsNone(val1, "split_name should return None split value for empty name") + name2, val2 = HedTypeDefs.split_name('lumber') + self.assertEqual(name2, 'lumber', 'split_name should return name if no split value') + self.assertEqual(val2, '', 'split_name should return empty string if no split value') + name3, val3 = HedTypeDefs.split_name('Lumber/5.23', lowercase=False) + self.assertEqual(name3, 'Lumber', 'split_name should return name if split value') + self.assertEqual(val3, '5.23', 'split_name should return value as string if split value') + name4, val4 = HedTypeDefs.split_name('Lumber/5.23') + self.assertEqual(name4, 'lumber', 'split_name should return name if split value') + self.assertEqual(val4, '5.23', 'split_name should return value as string if split value') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_hed_type_factors.py b/tests/tools/analysis/test_hed_type_factors.py index 378617a12..d9e47eb22 100644 --- a/tests/tools/analysis/test_hed_type_factors.py +++ b/tests/tools/analysis/test_hed_type_factors.py @@ -1,16 +1,14 @@ import os import unittest -import pandas as pd -from hed.errors.exceptions import HedFileError -from hed.models import DefinitionEntry +from pandas import DataFrame +from hed.models import DefinitionDict from hed.models.hed_string import HedString -from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_type import HedType +from hed.errors.exceptions import HedFileError from hed.tools.analysis.hed_type_factors import HedTypeFactors -from hed.models.df_util import get_assembled class Test(unittest.TestCase): @@ -19,89 +17,93 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = [HedString("Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", hed_schema=schema), - HedString('(Def/Cond1, Offset)', hed_schema=schema), - HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', - hed_schema=schema), - HedString('', hed_schema=schema), - HedString('(Def/Cond2, Onset)', hed_schema=schema), - HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema), - HedString('Arm, Leg, Condition-variable/Fast', hed_schema=schema)] - cls.test_strings2 = [HedString("Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - hed_schema=schema), - HedString("Yellow", hed_schema=schema), - HedString("Def/Cond2, (Def/Cond6/4, Onset)", hed_schema=schema), - HedString("Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", hed_schema=schema), - HedString("Def/Cond2, Def/Cond6/4", hed_schema=schema)] - - def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=schema) - cls.defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } - - cls.test_strings3 = [HedString('(Def/Cond3, Offset)', hed_schema=schema)] - + # Set up the definition dictionary + defs = [HedString('(Definition/Cond1, (Condition-variable/Var1, Circle, Square))', hed_schema=schema), + HedString('(Definition/Cond2, (condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere))', + hed_schema=schema), + HedString( + '(Definition/Cond3/#, (Organizational-property/Condition-variable/Var3, Label/#, Ellipse, Cross))', + hed_schema=schema), + HedString('(Definition/Cond4, (Condition-variable, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond5, (Condition-variable/Lumber, Apple, Banana))', hed_schema=schema), + HedString('(Definition/Cond6/#, (Condition-variable/Lumber, Label/#, Apple, Banana))', + hed_schema=schema)] + def_dict = DefinitionDict() + for value in defs: + def_dict.check_for_definitions(value) + test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)", + "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", + "(Def/Cond1, Offset)", + "White, Black, Condition-variable/Wonder, Condition-variable/Fast", + "", + "(Def/Cond2, Onset)", + "(Def/Cond3/4.3, Onset)", + "Arm, Leg, Condition-variable/Fast"] + test_onsets1 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] + df1 = DataFrame(test_onsets1, columns=['onset']) + df1['HED'] = test_strings1 + input_data1 = TabularInput(df1) + cls.event_man1 = EventManager(input_data1, schema, extra_defs=def_dict) + cls.input_data1 = input_data1 + test_strings2 = ["Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", + "Yellow", + "Def/Cond2, (Def/Cond6/4, Onset)", + "Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", + "Def/Cond2, Def/Cond6/4"] + test_onsets2 = [0.0, 1.0, 2.0, 3.0, 4.0] + df2 = DataFrame(test_onsets2, columns=['onset']) + df2['HED'] = test_strings2 + input_data2 = TabularInput(df2) + cls.event_man2 = EventManager(input_data2, schema, extra_defs=def_dict) + test_strings3 = ['(Def/Cond3, Offset)'] + test_onsets3 = [0.0] + df3 = DataFrame(test_onsets3, columns=['onset']) + df3['HED'] = test_strings3 + cls.input_data3 = TabularInput(df3) bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) + '../../data/bids_tests/eeg_ds003645s_hed')) events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - cls.sidecar1 = sidecar1 cls.schema = schema + cls.tab_input = TabularInput(events_path, sidecar_path,) def test_with_mixed(self): - var_manager = HedTypeValues(HedContextManager(self.test_strings1, self.schema), self.defs, 'run-01') + var_manager = HedType(self.event_man1, 'run-01') var_facts = var_manager.get_type_value_factors('fast') self.assertIsInstance(var_facts, HedTypeFactors) df = var_facts.get_factors() - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(len(df), len(self.test_strings1)) + self.assertIsInstance(df, DataFrame) + self.assertEqual(len(df), len(self.event_man1.event_list)) self.assertEqual(len(df.columns), 1) summary1 = var_facts.get_summary() self.assertIsInstance(summary1, dict) def test_tabular_input(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), definitions, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from a tabular input") + var_manager = HedType(EventManager(self.tab_input, self.schema), 'run-01') + self.assertIsInstance(var_manager, HedType) var_fact = var_manager.get_type_value_factors('face-type') self.assertIsInstance(var_fact, HedTypeFactors) this_str = str(var_fact) self.assertIsInstance(this_str, str) self.assertTrue(len(this_str)) fact2 = var_fact.get_factors() - self.assertIsInstance(fact2, pd.DataFrame) + self.assertIsInstance(fact2, DataFrame) df2 = var_fact._one_hot_to_categorical(fact2, ["unfamiliar-face-cond", "baloney"]) self.assertEqual(len(df2), 200) self.assertEqual(len(df2.columns), 1) def test_constructor_multiple_values(self): - var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + var_manager = HedType(self.event_man2, 'run-01') + self.assertIsInstance(var_manager, HedType) + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") var_fact1 = var_manager.get_type_value_factors('var2') self.assertIsInstance(var_fact1, HedTypeFactors) var_fact2 = var_manager.get_type_value_factors('lumber') fact2 = var_fact2.get_factors() - self.assertIsInstance(fact2, pd.DataFrame) - self.assertEqual(len(fact2), len(self.test_strings2)) + self.assertIsInstance(fact2, DataFrame) + self.assertEqual(len(fact2), len(self.event_man2.event_list)) with self.assertRaises(HedFileError) as context: var_fact2.get_factors(factor_encoding="categorical") self.assertEqual(context.exception.args[0], "MultipleFactorSameEvent") @@ -110,15 +112,14 @@ def test_constructor_multiple_values(self): self.assertEqual(context.exception.args[0], "BadFactorEncoding") def test_constructor_unmatched(self): - with self.assertRaises(HedFileError) as context: - HedTypeValues(HedContextManager(self.test_strings3, self.schema), self.defs, 'run-01') - self.assertEqual(context.exception.args[0], 'UnmatchedOffset') + with self.assertRaises(KeyError) as context: + HedType(EventManager(self.input_data3, self.schema), 'run-01') + self.assertEqual(context.exception.args[0], 'cond3') def test_variable_summary(self): - var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + var_manager = HedType(self.event_man2, 'run-01') + self.assertIsInstance(var_manager, HedType) + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") for variable in var_manager.get_type_value_names(): var_sum = var_manager.get_type_value_factors(variable) @@ -126,24 +127,23 @@ def test_variable_summary(self): self.assertIsInstance(summary, dict, "get_summary returns a dictionary summary") def test_get_variable_factors(self): - var_manager = HedTypeValues(HedContextManager(self.test_strings2, self.schema), self.defs, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, + var_manager = HedType(self.event_man2, 'run-01') + self.assertIsInstance(var_manager, HedType) + self.assertEqual(len(var_manager._type_map), 3, "Constructor should have right number of type_variables if multiple") for variable in var_manager.get_type_value_names(): var_sum = var_manager.get_type_value_factors(variable) summary = var_sum.get_summary() factors = var_sum.get_factors() - self.assertIsInstance(factors, pd.DataFrame, "get_factors contains dataframe.") + self.assertIsInstance(factors, DataFrame, "get_factors contains dataframe.") self.assertEqual(len(factors), var_sum.number_elements, "get_factors has factors of same length as number of elements") - if not var_manager._type_value_map[variable].levels: + if not var_manager._type_map[variable].levels: self.assertEqual(len(factors.columns), 1) else: self.assertEqual(len(factors.columns), summary["levels"], 'get_factors has factors levels') - self.assertEqual(len(factors.columns), len(var_manager._type_value_map[variable].levels)) + self.assertEqual(len(factors.columns), len(var_manager._type_map[variable].levels)) def test_count_events(self): list1 = [0, 2, 6, 1, 2, 0, 0] @@ -158,9 +158,7 @@ def test_count_events(self): self.assertIsNone(max_multiple2, "_count_level_events should not have a max multiple for empty list") def test_get_summary(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), definitions, 'run-01') + var_manager = HedType(EventManager(self.tab_input, self.schema), 'run-01') var_key = var_manager.get_type_value_factors('key-assignment') sum_key = var_key.get_summary() self.assertEqual(sum_key['events'], 200, "get_summary has right number of events") @@ -172,7 +170,8 @@ def test_get_summary(self): self.assertEqual(sum_key['events'], 52, "get_summary has right number of events") self.assertEqual(sum_key['max_refs_per_event'], 1, "Get_summary has right multiple maximum") self.assertIsInstance(sum_key['level_counts'], dict, "get_summary level counts is a dictionary") - self.assertEqual(sum_key['level_counts']['unfamiliar-face-cond'], 20, "get_summary level counts value correct.") + self.assertEqual(sum_key['level_counts']['unfamiliar-face-cond'], + 20, "get_summary level counts value correct.") if __name__ == '__main__': diff --git a/tests/tools/analysis/test_hed_type_manager.py b/tests/tools/analysis/test_hed_type_manager.py index bec7d4d88..951c847a6 100644 --- a/tests/tools/analysis/test_hed_type_manager.py +++ b/tests/tools/analysis/test_hed_type_manager.py @@ -3,10 +3,10 @@ from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_type_values import HedTypeValues +from hed.tools.analysis.hed_type import HedType +from hed.tools.analysis.event_manager import EventManager from hed.tools.analysis.hed_type_factors import HedTypeFactors from hed.tools.analysis.hed_type_manager import HedTypeManager -from hed.models.df_util import get_assembled class Test(unittest.TestCase): @@ -20,50 +20,42 @@ def setUp(self): sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') self.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - self.hed_strings, self.definitions = get_assembled(self.input_data, sidecar1, schema, - extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - self.sidecar1 = sidecar1 self.schema = schema def test_constructor(self): - var_manager = HedTypeManager(self.hed_strings, self.schema, self.definitions) + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) self.assertIsInstance(var_manager, HedTypeManager, "Constructor should create a HedTypeManager from a tabular input") - self.assertEqual(len(var_manager.context_manager.hed_strings), len(var_manager.context_manager.contexts), + self.assertEqual(len(var_manager.event_manager.hed_strings), len(var_manager.event_manager.onsets), "Variable managers have context same length as hed_strings") - self.assertFalse(var_manager._type_tag_map, "constructor has empty map") + self.assertFalse(var_manager._type_map, "constructor has empty map") def test_summarize(self): - var_manager = HedTypeManager(self.hed_strings, self.schema, self.definitions) + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) self.assertIsInstance(var_manager, HedTypeManager, "Constructor should create a HedTypeManager from a tabular input") - self.assertEqual(len(var_manager.context_manager.hed_strings), len(var_manager.context_manager.contexts), - "Variable managers have context same length as hed_strings") - self.assertFalse(var_manager._type_tag_map, "constructor has empty map") + # ToDo: Test summarize def test_add_type_variable(self): - var_manager = HedTypeManager(self.hed_strings, self.schema, self.definitions) - self.assertFalse(var_manager._type_tag_map, "constructor has empty map") - var_manager.add_type_variable("Condition-variable") - self.assertEqual(len(var_manager._type_tag_map), 1, + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + self.assertFalse(var_manager._type_map, "constructor has empty map") + var_manager.add_type("Condition-variable") + self.assertEqual(len(var_manager._type_map), 1, "add_type_variable has 1 element map after one type added") - self.assertIn("condition-variable", var_manager._type_tag_map, + self.assertIn("condition-variable", var_manager._type_map, "add_type_variable converts type elements to lower case") - var_manager.add_type_variable("Condition-variable") - self.assertEqual(len(var_manager._type_tag_map), 1, + var_manager.add_type("Condition-variable") + self.assertEqual(len(var_manager._type_map), 1, "add_type_variable has 1 element map after same type is added twice") - var_manager.add_type_variable("task") - self.assertEqual(len(var_manager._type_tag_map), 2, + var_manager.add_type("task") + self.assertEqual(len(var_manager._type_map), 2, "add_type_variable has 2 element map after two types are added") def test_get_factor_vectors(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - base_length = len(hed_strings) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - var_manager.add_type_variable("Condition-variable") - var_manager.add_type_variable("task") + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + base_length = len(self.input_data.dataframe) + var_manager.add_type("Condition-variable") + var_manager.add_type("task") df_cond = var_manager.get_factor_vectors("condition-variable") df_task = var_manager.get_factor_vectors("task") self.assertEqual(len(df_cond), base_length, "get_factor_vectors returns df same length as original") @@ -73,51 +65,40 @@ def test_get_factor_vectors(self): df_baloney = var_manager.get_factor_vectors("baloney") self.assertIsNone(df_baloney, "get_factor_vectors returns None if no factors") - def test_get_type_variable(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - var_manager.add_type_variable("Condition-variable") - type_var = var_manager.get_type_variable("condition-variable") - self.assertIsInstance(type_var, HedTypeValues, - "get_type_variable returns a HedTypeValues if the key exists") - type_var = var_manager.get_type_variable("baloney") + def test_get_types(self): + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + var_manager.add_type("Condition-variable") + type_var = var_manager.get_type("condition-variable") + self.assertIsInstance(type_var, HedType, "get_type returns a HedType if the key exists") + type_var = var_manager.get_type("baloney") self.assertIsNone(type_var, "get_type_variable returns None if the key does not exist") - def test_get_type_variable_def_names(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - var_manager.add_type_variable("Condition-variable") - def_names = var_manager.get_type_tag_def_names("condition-variable") + def test_get_type_def_names(self): + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + var_manager.add_type("Condition-variable") + def_names = var_manager.get_type_def_names("condition-variable") self.assertEqual(len(def_names), 7, "get_type_tag_def_names has right length if condition-variable exists") self.assertIn('scrambled-face-cond', def_names, "get_type_tag_def_names returns a list with a correct value if condition-variable exists") - def_names = var_manager.get_type_tag_def_names("baloney") + def_names = var_manager.get_type_def_names("baloney") self.assertFalse(def_names, "get_type_tag_def_names returns empty if the type does not exist") - def test_get_variable_type_map(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - var_manager.add_type_variable("Condition-variable") - this_var = var_manager.get_type_variable("condition-variable") - self.assertIsInstance(this_var, HedTypeValues, - "get_type_variable_map returns a non-empty map when key lower case") + def test_get_type(self): + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + var_manager.add_type("Condition-variable") + this_var = var_manager.get_type("condition-variable") + self.assertIsInstance(this_var, HedType, "get_type returns a non-empty map when key lower case") self.assertEqual(len(this_var.type_variables), 3, "get_type_variable_map map has right length when key lower case") - this_var2 = var_manager.get_type_variable("Condition-variable") - self.assertIsInstance(this_var2, HedTypeValues, - "get_type_variable_map returns a non-empty map when key upper case") + this_var2 = var_manager.get_type("Condition-variable") + self.assertIsInstance(this_var2, HedType, "get_type returns a non-empty map when key upper case") self.assertEqual(len(this_var2.type_variables), 3, "get_type_variable_map map has right length when key upper case") def test_get_type_variable_factor(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - var_manager.add_type_variable("Condition-variable") + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + var_manager.add_type("Condition-variable") var_factor1 = var_manager.get_type_tag_factor("condition-variable", "key-assignment") self.assertIsInstance(var_factor1, HedTypeFactors, "get_type_tag_factor returns a HedTypeFactors if type variable factor exists") @@ -126,29 +107,25 @@ def test_get_type_variable_factor(self): var_factor3 = var_manager.get_type_tag_factor("baloney1", "key-assignment") self.assertIsNone(var_factor3, "get_type_tag_factor returns None if type variable does not exist") - def test_type_variables(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) - vars1 = var_manager.type_variables + def test_types(self): + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) + vars1 = var_manager.types self.assertFalse(vars1, "type_variables is empty if no types have been added") - var_manager.add_type_variable("Condition-variable") - var_manager.add_type_variable("task") - vars2 = var_manager.type_variables + var_manager.add_type("Condition-variable") + var_manager.add_type("task") + vars2 = var_manager.types self.assertIsInstance(vars2, list, "type_variables returns a list ") self.assertEqual(len(vars2), 2, "type_variables return list is right length") def test_summarize_all(self): - hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeManager(hed_strings, self.schema, definitions) + var_manager = HedTypeManager(EventManager(self.input_data, self.schema)) summary1 = var_manager.summarize_all() self.assertIsInstance(summary1, dict, "summarize_all returns a dictionary when nothing has been added") self.assertFalse(summary1, "summarize_all return dictionary is empty when nothing has been added") - vars1 = var_manager.type_variables + vars1 = var_manager.types self.assertFalse(vars1, "type_variables is empty if no types have been added") - var_manager.add_type_variable("Condition-variable") - var_manager.add_type_variable("task") + var_manager.add_type("Condition-variable") + var_manager.add_type("task") summary2 = var_manager.summarize_all() self.assertIsInstance(summary2, dict, "summarize_all returns a dictionary after additions") self.assertEqual(len(summary2), 2, diff --git a/tests/tools/analysis/test_hed_type_values.py b/tests/tools/analysis/test_hed_type_values.py deleted file mode 100644 index d8428e23c..000000000 --- a/tests/tools/analysis/test_hed_type_values.py +++ /dev/null @@ -1,171 +0,0 @@ -import os -import unittest -from pandas import DataFrame -from hed.errors.exceptions import HedFileError -from hed.models import DefinitionEntry -from hed.models.hed_string import HedString -from hed.models.hed_tag import HedTag -from hed.models.sidecar import Sidecar -from hed.models.tabular_input import TabularInput -from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager -from hed.tools.analysis.hed_type_values import HedTypeValues -from hed.models.df_util import get_assembled - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = ["Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - "(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", - '(Def/Cond1, Offset)', - 'White, Black, Condition-variable/Wonder, Condition-variable/Fast', - '', - '(Def/Cond2, Onset)', - '(Def/Cond3/4.3, Onset)', - 'Arm, Leg, Condition-variable/Fast'] - cls.test_strings2 = ["Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - "Yellow", - "Def/Cond2, (Def/Cond6/4, Onset)", - "Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", - "Def/Cond2, Def/Cond6/4"] - cls.test_strings3 = ['(Def/Cond3, Offset)'] - - def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=schema) - cls.defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) - cls.events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - cls.sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - cls.schema = schema - - def test_constructor(self): - strings1 = [HedString(hed, hed_schema=self.schema) for hed in self.test_strings1] - con_man = HedContextManager(strings1, hed_schema=self.schema) - type_var = HedTypeValues(con_man, self.defs, 'run-01') - self.assertIsInstance(type_var, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(type_var._type_value_map), 8, - "Constructor ConditionVariables should have the right length") - - def test_constructor_from_tabular_input(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from a tabular input") - - def test_constructor_variable_caps(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), - definitions, 'run-01', type_tag="Condition-variable") - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager variable caps") - - def test_constructor_variable_task(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), - definitions, 'run-01', type_tag="task") - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager variable task") - - def test_constructor_multiple_values(self): - hed_strings = [HedString(hed, self.schema) for hed in self.test_strings2] - var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - self.assertIsInstance(var_manager, HedTypeValues, - "Constructor should create a HedTypeManager from strings") - self.assertEqual(len(var_manager._type_value_map), 3, - "Constructor should have right number of type_variables if multiple") - - def test_constructor_unmatched(self): - hed_strings = [HedString(hed, self.schema) for hed in self.test_strings3] - with self.assertRaises(HedFileError) as context: - HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - self.assertEqual(context.exception.args[0], 'UnmatchedOffset') - - def test_get_variable_factors(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01') - df_new1 = var_manager.get_type_factors() - self.assertIsInstance(df_new1, DataFrame) - self.assertEqual(len(df_new1), 200) - self.assertEqual(len(df_new1.columns), 7) - df_new2 = var_manager.get_type_factors(type_values=["face-type"]) - self.assertEqual(len(df_new2), 200) - self.assertEqual(len(df_new2.columns), 3) - df_new3 = var_manager.get_type_factors(type_values=["junk"]) - self.assertIsNone(df_new3) - - def test_str(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01') - new_str = str(var_manager) - self.assertIsInstance(new_str, str) - - def test_summarize_variables(self): - sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json') - input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events") - test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None, - join_columns=True, shrink_defs=True, expand_defs=False) - var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01') - summary = var_manager.get_summary() - self.assertIsInstance(summary, dict, "get_summary produces a dictionary if not json") - self.assertEqual(len(summary), 3, "Summarize_variables has right number of condition type_variables") - self.assertIn("key-assignment", summary, "get_summary has a correct key") - - def test_extract_definition_variables(self): - hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1] - var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - var_levels = var_manager._type_value_map['var3'].levels - self.assertNotIn('cond3/7', var_levels, - "_extract_definition_variables before extraction def/cond3/7 not in levels") - tag = HedTag("Def/Cond3/7", hed_schema=self.schema) - var_manager._extract_definition_variables(tag, 5) - self.assertIn('cond3/7', var_levels, - "_extract_definition_variables after extraction def/cond3/7 not in levels") - - def test_get_variable_names(self): - hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1] - conditions1 = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - list1 = conditions1.get_type_value_names() - self.assertEqual(len(list1), 8, "get_variable_tags list should have the right length") - - def test_get_variable_def_names(self): - hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1] - conditions1 = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01') - list1 = conditions1.get_type_def_names() - self.assertEqual(len(list1), 5, "get_type_def_names list should have the right length") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/analysis/test_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py index b983c6f8b..40c7ad8db 100644 --- a/tests/tools/analysis/test_tabular_summary.py +++ b/tests/tools/analysis/test_tabular_summary.py @@ -36,18 +36,25 @@ def test_extract_summary(self): tab1 = TabularSummary() stern_df = get_new_dataframe(self.stern_map_path) tab1.update(stern_df) - sum_info = tab1.get_summary() - new_tab1 = TabularSummary.extract_summary(sum_info) + sum_info1 = tab1.get_summary() + self.assertIsInstance(sum_info1, dict) + self.assertEqual(len(sum_info1['Categorical columns']), 4) + new_tab1 = TabularSummary.extract_summary(sum_info1) + self.assertIsInstance(new_tab1, TabularSummary) tab2 = TabularSummary(value_cols=['letter'], skip_cols=['event_type']) + sum_info2 = tab2.get_summary() + self.assertIsInstance(sum_info2, dict) + new_tab2 = TabularSummary.extract_summary(sum_info2) + self.assertIsInstance(new_tab2, TabularSummary) tabular_info = {} - new_tab = TabularSummary.extract_summary(tabular_info) - self.assertIsInstance(new_tab, TabularSummary) + new_tab3 = TabularSummary.extract_summary(tabular_info) + self.assertIsInstance(new_tab3, TabularSummary) def test_extract_summary_empty(self): tabular_info = {} new_tab = TabularSummary.extract_summary(tabular_info) self.assertIsInstance(new_tab, TabularSummary) - + def test_get_number_unique_values(self): dict1 = TabularSummary() wh_df = get_new_dataframe(self.wh_events_path) @@ -218,7 +225,7 @@ def test_update_summary(self): tab.update(df, name=name) self.assertEqual(tab.total_events, 200) self.assertEqual(tab.total_files, 1) - tab_all.update_summary(tab) + tab_all.update_summary(tab) self.assertEqual(len(files_bids), tab_all.total_files) self.assertEqual(len(files_bids)*200, tab_all.total_events) diff --git a/tests/tools/analysis/test_temporal_event.py b/tests/tools/analysis/test_temporal_event.py index a05545bd6..cbf6cf135 100644 --- a/tests/tools/analysis/test_temporal_event.py +++ b/tests/tools/analysis/test_temporal_event.py @@ -3,7 +3,6 @@ from hed.schema.hed_schema_io import load_schema_version from hed.models import HedString, HedGroup, Sidecar, TabularInput -from hed.models.df_util import get_assembled from hed.tools.analysis.temporal_event import TemporalEvent from hed.tools.analysis.event_manager import EventManager @@ -26,12 +25,12 @@ def setUpClass(cls): cls.schema = schema def test_constructor_no_group(self): - test1 = HedString("(Onset, Def/Blech)", hed_schema=self.schema) + test1 = HedString("(Onset, def/blech)", hed_schema=self.schema) groups = test1.find_top_level_tags(["onset"], include_groups=1) te = TemporalEvent(groups[0], 3, 4.5) self.assertEqual(te.start_index, 3) self.assertEqual(te.start_time, 4.5) - self.assertEqual(te.anchor, 'blech') + self.assertEqual(te.anchor, 'Def/blech') self.assertFalse(te.internal_group) def test_constructor_group(self): @@ -41,7 +40,7 @@ def test_constructor_group(self): self.assertEqual(te.start_index, 3) self.assertEqual(te.start_time, 4.5) self.assertTrue(te.internal_group) - self.assertEqual(te.anchor, 'blech/54.3') + self.assertEqual(te.anchor, 'Def/Blech/54.3') self.assertIsInstance(te.internal_group, HedGroup) def test_constructor_on_files(self): diff --git a/tests/tools/bids/test_bids_file_dictionary.py b/tests/tools/bids/test_bids_file_dictionary.py index ef7744feb..0262ce665 100644 --- a/tests/tools/bids/test_bids_file_dictionary.py +++ b/tests/tools/bids/test_bids_file_dictionary.py @@ -118,5 +118,6 @@ def test_correct_file(self): BidsFileDictionary._correct_file(["junk.tsv"]) self.assertEqual(context.exception.args[0], "BadBidsFileArgument") + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/bids/test_bids_file_group.py b/tests/tools/bids/test_bids_file_group.py index 4d4302b72..d1a66dc0f 100644 --- a/tests/tools/bids/test_bids_file_group.py +++ b/tests/tools/bids/test_bids_file_group.py @@ -3,7 +3,6 @@ from hed.schema.hed_schema_io import load_schema from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.bids.bids_file_group import BidsFileGroup -from hed.validator.hed_validator import HedValidator # TODO: Add test when exclude directories have files of the type needed (such as JSON in code directory). diff --git a/tests/tools/bids/test_bids_sidecar_file.py b/tests/tools/bids/test_bids_sidecar_file.py index 1be5b4e02..003658afd 100644 --- a/tests/tools/bids/test_bids_sidecar_file.py +++ b/tests/tools/bids/test_bids_sidecar_file.py @@ -79,7 +79,7 @@ def test_set_contents(self): self.assertFalse(sidecar1.contents, "set_contents before has no contents") self.assertFalse(sidecar1.has_hed, "set_contents before has_hed false") sidecar1.set_contents() - self.assertIsInstance(sidecar1.contents, Sidecar, "set_contents creates a sidecar on setcontents") + self.assertIsInstance(sidecar1.contents, Sidecar, "set_contents creates a sidecar on set_contents") self.assertTrue(sidecar1.has_hed, "set_contents before has_hed false") a = sidecar1.contents sidecar1.set_contents({'HED': 'xyz'}) diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py index 893794e45..37df0fef5 100644 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ b/tests/tools/remodeling/cli/test_run_remodel.py @@ -79,12 +79,12 @@ def test_main_bids(self): def test_main_bids_alt_path(self): work_path = os.path.realpath(os.path.join(self.extract_path, 'temp')) arg_list = [self.data_root, self.summary_model_path, '-x', 'derivatives', 'stimuli', '-r', '8.1.0', - '-j', self.sidecar_path, '-w', work_path] - + '-j', self.sidecar_path, '-w', work_path] + with patch('sys.stdout', new=io.StringIO()) as fp: main(arg_list) self.assertFalse(fp.getvalue()) - + def test_main_bids_verbose_bad_task(self): arg_list = [self.data_root, self.model_path, '-x', 'derivatives', 'stimuli', '-b', '-t', 'junk', '-v'] with patch('sys.stdout', new=io.StringIO()) as fp: diff --git a/tests/tools/remodeling/cli/test_run_remodel_restore.py b/tests/tools/remodeling/cli/test_run_remodel_restore.py index c18dcbcfd..c3645f3ae 100644 --- a/tests/tools/remodeling/cli/test_run_remodel_restore.py +++ b/tests/tools/remodeling/cli/test_run_remodel_restore.py @@ -5,7 +5,6 @@ from hed.errors import HedFileError from hed.tools.remodeling.cli.run_remodel_backup import main as back_main from hed.tools.remodeling.cli.run_remodel_restore import main -from hed.tools.remodeling.backup_manager import BackupManager from hed.tools.util.io_util import get_file_list @@ -63,13 +62,13 @@ def test_restore_alt_loc(self): os.remove(os.path.realpath(os.path.join(self.test_root_back1, 'top_level.tsv'))) files2 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives']) self.assertFalse(files2, "run_restore starts with the right number of files.") - arg_list = [self.test_root_back1, '-n', 'back1', '-w', alt_path,] + arg_list = [self.test_root_back1, '-n', 'back1', '-w', alt_path] main(arg_list) files3 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives']) self.assertEqual(len(files3)+1, len(files1), "run_restore restores all the files after") if os.path.exists(alt_path): - shutil.rmtree(alt_path) + shutil.rmtree(alt_path) if __name__ == '__main__': diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py index 01a27f949..48d177b0f 100644 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -1,8 +1,5 @@ -import pandas as pd -import numpy as np import unittest from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp -from hed.tools.remodeling.dispatcher import Dispatcher class Test(unittest.TestCase): diff --git a/tests/tools/remodeling/operations/test_factor_hed_type_op.py b/tests/tools/remodeling/operations/test_factor_hed_type_op.py index e43e0e803..8af8a3b7d 100644 --- a/tests/tools/remodeling/operations/test_factor_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_factor_hed_type_op.py @@ -14,9 +14,11 @@ class Test(unittest.TestCase): def setUpClass(cls): path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests/')) - cls.data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv')) + data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv')) cls.json_path = os.path.realpath(os.path.join(path, 'task-FacePerception_events.json')) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) + cls.df_test = dispatch.prep_data(dispatch.get_data_file(data_path)) + cls.dispatch = dispatch @classmethod def tearDownClass(cls): @@ -31,7 +33,7 @@ def setUp(self): def test_valid(self): # Test correct when all valid and no unwanted information op = FactorHedTypeOp(self.base_parameters) - df_new = op.do_op(self.dispatch, self.data_path, 'subj2_run1', sidecar=self.json_path) + df_new = op.do_op(self.dispatch, self.df_test, 'subj2_run1', sidecar=self.json_path) self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct") self.assertEqual(len(df_new.columns), 17, "factor_hed_type_op has correct number of columns") @@ -39,10 +41,8 @@ def test_valid_specific_column(self): parms = self.base_parameters parms["type_values"] = ["key-assignment"] op = FactorHedTypeOp(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') - df_new = dispatch.get_data_file(self.data_path) - df_new = op.do_op(dispatch, dispatch.prep_data(df_new), 'run-01', sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) + df_new = op.do_op(self.dispatch, self.df_test, 'run-01', sidecar=self.json_path) + df_new = self.dispatch.post_proc_data(df_new) self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct when type_values specified") self.assertEqual(len(df_new.columns), 11, "factor_hed_type_op has correct number of columns when type_values specified") diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index f6f88fe5e..3e2a2d508 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -2,6 +2,12 @@ import os import unittest import pandas as pd +from hed.models import TabularInput, Sidecar +from hed.schema import load_schema_version +from hed.tools.analysis.hed_tag_counts import HedTagCounts +from hed.tools.analysis.event_manager import EventManager +from hed.tools.analysis.hed_tag_manager import HedTagManager +from io import StringIO from hed.models.df_util import get_assembled from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummary @@ -27,8 +33,11 @@ def setUpClass(cls): "Objects": ["Item"], "Properties": ["Property"] }, - "expand_context": False, + "include_context": False, + "replace_defs": False, + "remove_types": ["Condition-variable", "Task"] } + cls.base_parameters = base_parameters cls.json_parms = json.dumps(base_parameters) @classmethod @@ -39,7 +48,10 @@ def test_constructor(self): parms = json.loads(self.json_parms) sum_op1 = SummarizeHedTagsOp(parms) self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - parms["expand_context"] = "" + + def test_constructor_bad_params(self): + parms = json.loads(self.json_parms) + parms["include_context"] = "" with self.assertRaises(TypeError) as context: SummarizeHedTagsOp(parms) self.assertEqual(context.exception.args[0], "BadType") @@ -49,7 +61,7 @@ def test_constructor(self): SummarizeHedTagsOp(parms2) self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): + def test_do_op_no_replace_no_context_remove_on(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) sum_op = SummarizeHedTagsOp(parms) @@ -60,17 +72,81 @@ def test_do_op(self): self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns") self.assertIn(sum_op.summary_name, dispatch.summary_dicts) self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedTagSummary) - x = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47) + counts = dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts, HedTagCounts) + self.assertEqual(len(counts.tag_dict), 16) + self.assertIn('def', counts.tag_dict) + self.assertNotIn('task', counts.tag_dict) + self.assertNotIn('condition-variable', counts.tag_dict) df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) + self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 16) + + def test_do_op_options(self): + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.2.0']) + df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") + + # # no replace, no context, types removed + # parms1 = json.loads(self.json_parms) + # parms1["summary_name"] = "tag summary 1" + # sum_op1 = SummarizeHedTagsOp(parms1) + # df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + # self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") + # self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") + # self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") + # self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) + # self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) + # counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict['subj2_run1'] + # self.assertIsInstance(counts1, HedTagCounts) + # self.assertEqual(len(counts1.tag_dict), 16) + # self.assertNotIn('event-context', counts1.tag_dict) + # self.assertIn('def', counts1.tag_dict) + # self.assertNotIn('task', counts1.tag_dict) + # self.assertNotIn('condition-variable', counts1.tag_dict) + # + # # no replace, context, types removed + # parms2 = json.loads(self.json_parms) + # parms2["include_context"] = True + # parms2["summary_name"] = "tag summary 2" + # sum_op2 = SummarizeHedTagsOp(parms2) + # df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + # self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") + # self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") + # self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") + # self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) + # self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) + # counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict['subj2_run1'] + # self.assertIsInstance(counts2, HedTagCounts) + # self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) + # self.assertIn('event-context', counts2.tag_dict) + # self.assertIn('def', counts2.tag_dict) + # self.assertNotIn('task', counts2.tag_dict) + # self.assertNotIn('condition-variable', counts2.tag_dict) + + # no replace, context, types removed + parms3 = json.loads(self.json_parms) + parms3["include_context"] = True + parms3["replace_defs"] = True + parms3["summary_name"] = "tag summary 3" + sum_op3 = SummarizeHedTagsOp(parms3) + df_new3 = sum_op3.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIsInstance(sum_op3, SummarizeHedTagsOp, "constructor creates an object of the correct type") + self.assertEqual(200, len(df_new3), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new3.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op3.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op3.summary_name], HedTagSummary) + counts3 = dispatch.summary_dicts[sum_op3.summary_name].summary_dict['subj2_run1'] + self.assertIsInstance(counts3, HedTagCounts) + self.assertEqual(32, len(counts3.tag_dict)) + # self.assertIn('event-context', counts3.tag_dict) TODO: Fix this + self.assertNotIn('def', counts3.tag_dict) + self.assertNotIn('task', counts3.tag_dict) + self.assertNotIn('condition-variable', counts3.tag_dict) def test_quick3(self): - from hed.models import TabularInput, Sidecar - from hed.schema import load_schema_version - from hed.tools.analysis.hed_tag_counts import HedTagCounts - from io import StringIO - my_schema = load_schema_version('8.1.0') + include_context = True + replace_defs = True + remove_types = [] + my_schema = load_schema_version('8.2.0') my_json = { "code": { "HED": { @@ -89,19 +165,16 @@ def test_quick3(self): data = [[0.5, 0, 'code1', 'Description/This is a test, Label/Temp, (Def/Blech1, Green)'], [0.6, 0, 'code2', 'Sensory-event, ((Description/Animal, Condition-variable/Blech))']] df = pd.DataFrame(data, columns=['onset', 'duration', 'code', 'HED']) - input_data = TabularInput(df, sidecar=my_sidecar) + input_data = TabularInput(df, sidecar=my_sidecar, name="myName") + tag_man = HedTagManager(EventManager(input_data, my_schema), remove_types=remove_types) counts = HedTagCounts('myName', 2) summary_dict = {} - hed_strings, definitions = get_assembled(input_data, my_sidecar, my_schema, extra_def_dicts=None, join_columns=True, - shrink_defs=False, expand_defs=True) - for hed in hed_strings: - counts.update_event_counts(hed, 'myName') - summary_dict['myName'] = counts + # hed_objs = tag_man.get_hed_objs(include_context=include_context, replace_defs=replace_defs) + # for hed in hed_objs: + # counts.update_event_counts(hed, 'myName') + # summary_dict['myName'] = counts def test_quick4(self): - from hed.models import TabularInput, Sidecar - from hed.schema import load_schema_version - from hed.tools.analysis.hed_tag_counts import HedTagCounts path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests/')) data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv')) @@ -183,7 +256,7 @@ def test_sample_example(self): "Participant-response"], "Objects": ["Item"] }, - "expand_context": False + "include_context": False }}] sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'], diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index 0e3bcdfab..e5338d6d0 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -107,7 +107,7 @@ def test_complex_file_validation_invalid(self): def test_complex_file_validation_invalid_definitions_removed(self): # todo: update this/remove - # This verifies definitions are being removed from sidecar strings before being added, or it will produce + # This verifies type_defs are being removed from sidecar strings before being added, or it will produce # extra errors. schema_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/validator_tests/bids_schema.mediawiki'))