diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index bef3a4208..c663ff001 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -52,15 +52,15 @@ def val_error_invalid_tag_character(tag, problem_tag): return f"Invalid character '{problem_tag}' in tag '{tag}'" -@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, has_sub_tag=True, +@hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, has_sub_tag=False, actual_code=ValidationErrors.CHARACTER_INVALID) -def val_error_INVALID_VALUE_CLASS_CHARACTER(tag, problem_tag, value_class): +def val_error_val_error_invalid_value_class_character(tag, problem_tag, value_class): return f"Invalid character '{problem_tag}' in tag '{tag}' for value class '{value_class}'" @hed_tag_error(ValidationErrors.INVALID_VALUE_CLASS_VALUE, has_sub_tag=True, actual_code=ValidationErrors.VALUE_INVALID) -def val_error_INVALID_VALUE_CLASS_VALUE(tag, problem_tag, value_class): +def val_error_invalid_value_class_value(tag, problem_tag, value_class): return f"'{tag}' has an invalid value portion for value class '{value_class}'" @@ -70,15 +70,21 @@ def val_error_tildes_not_supported(source_string, char_index): return f"Tildes not supported. Replace (a ~ b ~ c) with (a, (b, c)). '{character}' at index {char_index}'" -@hed_tag_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, has_sub_tag=True, +@hed_tag_error(ValidationErrors.HED_PLACEHOLDER_OUT_OF_CONTEXT, has_sub_tag=False, + actual_code=ValidationErrors.PLACEHOLDER_INVALID) +def val_error_hed_placeholder_out_of_context(tag): + return f"'{tag}' has a '#' placeholder where it is not allowed or where it should have been replaced with a value." + + +@hed_tag_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, has_sub_tag=False, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) -def val_error_CURLY_BRACE_UNSUPPORTED_HERE(tag, problem_tag): +def val_error_curly_brace_unsupported_here(tag, problem_tag): return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag. " f"Invalid character '{problem_tag}' in tag '{tag}'") @hed_error(ValidationErrors.ONSETS_UNORDERED, default_severity=ErrorSeverity.WARNING) -def val_error_ONSETS_UNORDERED(): +def val_error_onsets_unordered(): return "Onsets need to be temporally increasing and defined for many downstream tools to work." @@ -97,6 +103,17 @@ def val_error_duplicate_group(group): return f'Repeated group - "{group}"' +@hed_error(ValidationErrors.HED_RESERVED_TAG_REPEATED, actual_code=ValidationErrors.TAG_GROUP_ERROR) +def val_error_duplicate_reserved_tag(tag, group): + return f'Repeated reserved tag "{tag}" or multiple reserved tags in group "{group}"' + + +@hed_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, actual_code=ValidationErrors.TAG_GROUP_ERROR) +def val_error_group_for_reserved_tag(group, group_count): + return (f'The number of non-def-expand subgroups for group "{group}" is {group_count}, "' + f'which does not meet reserved tag requirements."') + + @hed_error(ValidationErrors.PARENTHESES_MISMATCH) def val_error_parentheses(opening_parentheses_count, closing_parentheses_count): return f'Number of opening and closing parentheses are unequal. '\ @@ -236,12 +253,12 @@ def val_error_def_expand_value_extra(tag): @hed_tag_error(ValidationErrors.HED_TOP_LEVEL_TAG, actual_code=ValidationErrors.TAG_GROUP_ERROR) def val_error_top_level_tag(tag): - return f"A tag that must be in a top level group was found in another location. {str(tag)}" + return f'Tag "{tag}" must be in a top level group but was found in another location.' @hed_tag_error(ValidationErrors.HED_TAG_GROUP_TAG, actual_code=ValidationErrors.TAG_GROUP_ERROR) def val_error_tag_group_tag(tag): - return f"A tag that must be in a group was found in another location. {str(tag)}" + return f'Tag "{tag}" that must be in a group was found in another location.' @hed_tag_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, actual_code=ValidationErrors.TAG_GROUP_ERROR) @@ -251,6 +268,11 @@ def val_error_top_level_tags(tag, multiple_tags): f"Remainder:{str(tags_as_string)}" +@hed_tag_error(ValidationErrors.HED_TAGS_NOT_ALLOWED, actual_code=ValidationErrors.TAG_GROUP_ERROR) +def val_error_tags_in_group_with_reserved(tag, group): + return f'Tag "{tag}" is not allowed with the other tag(s) or Def-expand sub-group in group "{group}"' + + @hed_error(ValidationErrors.REQUIRED_TAG_MISSING) def val_warning_required_prefix_missing(tag_namespace): return f"Tag with namespace '{tag_namespace}' is required" @@ -261,11 +283,6 @@ def val_warning_capitalization(tag): return f"First word not capitalized or camel case - '{tag}'" -@hed_tag_error(ValidationErrors.UNITS_MISSING, default_severity=ErrorSeverity.WARNING) -def val_warning_default_units_used(tag, default_unit): - return f"Tag '{tag}' expects units, but no units were given." - - @hed_error(SidecarErrors.BLANK_HED_STRING) def sidecar_error_blank_hed_string(): return "No HED string found for Value or Category column." @@ -293,12 +310,12 @@ def sidecar_error_unknown_column(column_name): @hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=ValidationErrors.SIDECAR_INVALID) -def SIDECAR_HED_USED(): +def sidecar_hed_used(): return "'HED' is a reserved name and cannot be used as a sidecar except in expected places." @hed_error(SidecarErrors.SIDECAR_HED_USED_COLUMN, actual_code=ValidationErrors.SIDECAR_INVALID) -def SIDECAR_HED_USED_COLUMN(): +def sidecar_hed_used_column(): return "'HED' is a reserved name and cannot be used as a sidecar column name" @@ -384,7 +401,7 @@ def onset_error_inset_before_onset(tag): @hed_tag_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR) def onset_no_def_found(tag): - return f"'{tag}' tag has no def or def-expand tag in string." + return f"'{tag}' tag has no def tag or def-expand group or too many when 1 is required in string." @hed_tag_error(TemporalErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR) @@ -401,7 +418,7 @@ def onset_too_many_groups(tag, tag_list): @hed_tag_error(TemporalErrors.DURATION_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR) -def onset_DURATION_WRONG_NUMBER_GROUPS(tag, tag_list): +def onset_duration_wrong_number_groups(tag, tag_list): tag_list_strings = [str(a_tag) for a_tag in tag_list] return f"A duration and/or delay tag '{tag}'should have exactly one child group." \ f"Found {len(tag_list_strings)}: {tag_list_strings}" @@ -421,7 +438,7 @@ def onset_wrong_placeholder(tag, has_placeholder): @hed_tag_error(TemporalErrors.DURATION_HAS_OTHER_TAGS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR) -def onset_DURATION_HAS_OTHER_TAGS(tag): +def onset_duration_has_other_tags(tag): return f"Tag '{tag}' should not be grouped with Duration or Delay. Context tags should be in a sub-group." diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 1ca1e047d..ebe58acbc 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -49,7 +49,6 @@ class ValidationErrors: TAG_REQUIRES_CHILD = 'TAG_REQUIRES_CHILD' TILDES_UNSUPPORTED = 'TILDES_UNSUPPORTED' UNITS_INVALID = 'UNITS_INVALID' - UNITS_MISSING = 'UNITS_MISSING' VERSION_DEPRECATED = 'VERSION_DEPRECATED' VALUE_INVALID = 'VALUE_INVALID' @@ -63,6 +62,8 @@ class ValidationErrors: HED_DEF_EXPAND_VALUE_MISSING = "HED_DEF_EXPAND_VALUE_MISSING" HED_DEF_EXPAND_VALUE_EXTRA = "HED_DEF_EXPAND_VALUE_EXTRA" + HED_RESERVED_TAG_REPEATED = 'HED_RESERVED_TAG_REPEATED' + HED_RESERVED_TAG_GROUP_ERROR = 'HED_RESERVED_TAG_GROUP_ERROR' HED_TAG_REPEATED = 'HED_TAG_REPEATED' HED_TAG_REPEATED_GROUP = 'HED_TAG_REPEATED_GROUP' @@ -71,10 +72,10 @@ class ValidationErrors: NODE_NAME_EMPTY = 'NODE_NAME_EMPTY' HED_LIBRARY_UNMATCHED = "HED_LIBRARY_UNMATCHED" - HED_TOP_LEVEL_TAG = "HED_TOP_LEVEL_TAG" HED_MULTIPLE_TOP_TAGS = "HED_MULTIPLE_TOP_TAGS" HED_TAG_GROUP_TAG = "HED_TAG_GROUP_TAG" + HED_TAGS_NOT_ALLOWED = "HED_TAGS_NOT_ALLOWED" HED_GROUP_EMPTY = 'HED_GROUP_EMPTY' # end internal codes @@ -92,7 +93,8 @@ class ValidationErrors: INVALID_VALUE_CLASS_VALUE = 'INVALID_VALUE_CLASS_VALUE' INVALID_TAG_CHARACTER = 'invalidTagCharacter' - CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE" + HED_PLACEHOLDER_OUT_OF_CONTEXT = 'HED_PLACEHOLDER_OUT_OF_CONTEXT' + CURLY_BRACE_UNSUPPORTED_HERE = 'CURLY_BRACE_UNSUPPORTED_HERE' ONSETS_UNORDERED = "ONSETS_UNORDERED" @@ -103,9 +105,9 @@ class SidecarErrors: INVALID_POUND_SIGNS_VALUE = 'invalidNumberPoundSigns' INVALID_POUND_SIGNS_CATEGORY = 'tooManyPoundSigns' UNKNOWN_COLUMN_TYPE = 'sidecarUnknownColumn' - SIDECAR_HED_USED_COLUMN = 'SIDECAR_HED_USED_COLUMN' + SIDECAR_HED_USED_COLUMN = 'sidecar_hed_used_column' SIDECAR_NA_USED = 'SIDECAR_NA_USED' - SIDECAR_HED_USED = 'SIDECAR_HED_USED' + SIDECAR_HED_USED = 'sidecar_hed_used' SIDECAR_BRACES_INVALID = "SIDECAR_BRACES_INVALID" diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 2b9c819f4..c483ca5e0 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -482,10 +482,11 @@ def _load_excel_file(self, file, has_column_names): loaded_worksheet = self.get_worksheet(self._worksheet_name) self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names) except Exception as e: - raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load Excel file: {str(e)}", self.name) from e + raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, + f"Failed to load Excel file: {str(e)}", self.name) from e def _load_text_file(self, file, pandas_header): - """ Load an text file""" + """ Load a text file""" if isinstance(file, str) and os.path.exists(file) and os.path.getsize(file) == 0: self._dataframe = pd.DataFrame() # Handle empty file return diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 4f0b58cbb..038f066b8 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -1,208 +1,229 @@ -""" -Classes to resolve ambiguities, gather, expand definitions. -""" -import pandas as pd -from hed.models.definition_dict import DefinitionDict -from hed.models.definition_entry import DefinitionEntry -from hed.models.hed_string import HedString - - -class AmbiguousDef: - """ Determine whether expanded definitions are consistent. """ - def __init__(self): - self.actual_defs = [] - self.placeholder_defs = [] - - def add_def(self, def_tag, def_expand_group): - group_tag = def_expand_group.get_first_group() - def_extension = def_tag.extension.split('/')[-1] - self.actual_defs.append(group_tag) - group_tag = group_tag.copy() - matching_tags = [tag for tag in group_tag.get_all_tags() if - tag.extension == def_extension] - - for tag in matching_tags: - tag.extension = "#" - self.placeholder_defs.append(group_tag) - - def validate(self): - """ Validate the given ambiguous definition. - - Returns: - bool: True if this is a valid definition with exactly 1 placeholder. - - raises: - ValueError: Raised if this is an invalid(not ambiguous) definition. - """ - # todo: improve this and get_group - placeholder_group = self.get_group() - if not placeholder_group: - raise ValueError("Invalid Definition") - placeholder_mask = [(tag.extension == "#") for tag in placeholder_group.get_all_tags()] - all_tags_list = [group.get_all_tags() for group in self.actual_defs] - for tags, placeholder in zip(zip(*all_tags_list), placeholder_mask): - if placeholder: - continue - - tag_set = set(tag.extension for tag in tags) - if len(tag_set) > 1: - raise ValueError("Invalid Definition") - - return placeholder_mask.count(True) == 1 - - @staticmethod - def _get_matching_value(tags): - """ Get the matching value for a set of HedTag extensions. - - Parameters: - tags (iterator): The list of HedTags to find a matching value for. - - Returns: - str or None: The matching value if found, None otherwise. - """ - extensions = [tag.extension for tag in tags] - unique_extensions = set(extensions) - - if len(unique_extensions) == 1: - return unique_extensions.pop() - elif "#" in unique_extensions: - unique_extensions.remove("#") - if len(unique_extensions) == 1: - return unique_extensions.pop() - return None - - def get_group(self): - new_group = self.placeholder_defs[0].copy() - - all_tags_list = [group.get_all_tags() for group in self.placeholder_defs] - for tags, new_tag in zip(zip(*all_tags_list), new_group.get_all_tags()): - matching_val = self._get_matching_value(tags) - if matching_val is None: - return None - new_tag.extension = matching_val - - return new_group - - -class DefExpandGatherer: - """ Gather definitions from a series of def-expands, including possibly ambiguous ones. """ - def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None): - """Initialize the DefExpandGatherer class. - - Parameters: - hed_schema (HedSchema): The HED schema to be used for processing. - known_defs (str or list or DefinitionDict): A dictionary of known definitions. - ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. - - """ - self.hed_schema = hed_schema - self.ambiguous_defs = ambiguous_defs if ambiguous_defs else {} - self.errors = errors if errors else {} - self.def_dict = DefinitionDict(known_defs, self.hed_schema) - - def process_def_expands(self, hed_strings, known_defs=None): - """Process the HED strings containing def-expand tags. - - Parameters: - hed_strings (pd.Series or list): A Pandas Series or list of HED strings to be processed. - known_defs (dict, optional): A dictionary of known definitions to be added. - - Returns: - tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. - """ - if not isinstance(hed_strings, pd.Series): - hed_strings = pd.Series(hed_strings) - - def_expand_mask = hed_strings.str.contains('Def-Expand/', case=False) - - if known_defs: - self.def_dict.add_definitions(known_defs, self.hed_schema) - for i in hed_strings[def_expand_mask].index: - string = hed_strings.loc[i] - self._process_def_expand(string) - - return self.def_dict, self.ambiguous_defs, self.errors - - def _process_def_expand(self, string): - """Process a single HED string to extract definitions and handle known and ambiguous definitions. - - Parameters: - string (str): The HED string to be processed. - """ - hed_str = HedString(string, self.hed_schema) - - for def_tag, def_expand_group, def_group in hed_str.find_def_tags(recursive=True): - if def_tag == def_expand_group: - continue - - if not self._handle_known_definition(def_tag, def_expand_group, def_group): - self._handle_ambiguous_definition(def_tag, def_expand_group) - - def _handle_known_definition(self, def_tag, def_expand_group, def_group): - """Handle known def-expand tag in a HED string. - - Parameters: - def_tag (HedTag): The def-expand tag. - def_expand_group (HedGroup): The group containing the def-expand tag. - def_group (HedGroup): The group containing the def-expand group. - - Returns: - bool: True if the def-expand tag is known and handled, False otherwise. - """ - def_tag_name = def_tag.extension.split('/')[0] - def_group_contents = self.def_dict._get_definition_contents(def_tag) - def_expand_group.sort() - - if def_group_contents: - if def_group_contents != def_expand_group: - self.errors.setdefault(def_tag_name.casefold(), []).append(def_expand_group.get_first_group()) - return True - - has_extension = "/" in def_tag.extension - if not has_extension: - group_tag = def_expand_group.get_first_group() - self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag, - takes_value=False, - source_context=[]) - return True - - # this is needed for the cases where we have a definition with errors, but it's not a known definition. - if def_tag_name.casefold() in self.errors: - self.errors.setdefault(f"{def_tag_name.casefold()}", []).append(def_expand_group.get_first_group()) - return True - - return False - - def _handle_ambiguous_definition(self, def_tag, def_expand_group): - """ Handle ambiguous def-expand tag in a HED string. - - Parameters: - def_tag (HedTag): The def-expand tag. - def_expand_group (HedGroup): The group containing the def-expand tag. - """ - def_tag_name = def_tag.extension.split('/')[0] - these_defs = self.ambiguous_defs.setdefault(def_tag_name.casefold(), AmbiguousDef()) - these_defs.add_def(def_tag, def_expand_group) - - try: - if these_defs.validate(): - new_contents = these_defs.get_group() - self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=new_contents, - takes_value=True, - source_context=[]) - del self.ambiguous_defs[def_tag_name.casefold()] - except ValueError: - for ambiguous_def in these_defs.placeholder_defs: - self.errors.setdefault(def_tag_name.casefold(), []).append(ambiguous_def) - del self.ambiguous_defs[def_tag_name.casefold()] - - return - - @staticmethod - def get_ambiguous_group(ambiguous_def): - """Turn an entry in the ambiguous_defs dict into a single HedGroup. - - Returns: - HedGroup: The ambiguous definition with known placeholders filled in. - """ - return ambiguous_def.get_group() +""" +Classes to resolve ambiguities, gather, expand definitions. +""" +import pandas as pd +from hed.models.definition_dict import DefinitionDict +from hed.models.definition_entry import DefinitionEntry +from hed.models.hed_string import HedString + + +class AmbiguousDef: + """ Determine whether expanded definitions are consistent. """ + def __init__(self): + self.def_tag_name = None + self.actual_contents = {} + self.matching_names = None + self.resolved_definition = None + + def add_def(self, def_tag, def_expand_group): + """ Adds a definition to this ambiguous definition. + + Parameters: + def_tag (HedTag): The Def-expand tag representing this definition. + def_expand_group (HedGroup): The Definition group including the tag and contents. + + Raises: + ValueError: if this group could not match any of the other possible matches. + + """ + orig_group = def_expand_group.get_first_group() + def_extension = def_tag.extension.split('/') + existing_contents = self.actual_contents.get(def_extension[1], None) + if existing_contents and existing_contents != orig_group: + raise ValueError("Invalid Definition") + elif existing_contents: + return + self.actual_contents[def_extension[1]] = orig_group.copy() + if self.def_tag_name is None: + self.def_tag_name = 'Definition/' + def_extension[0] + '/#' + matching_tags = [tag for tag in orig_group.get_all_tags() if + tag.extension == def_extension[1] and tag.is_takes_value_tag()] + if len(matching_tags) == 0: + raise ValueError("Invalid Definition") + matching_names = set([tag.short_base_tag for tag in matching_tags]) + if self.matching_names is not None: + self.matching_names = self.matching_names & matching_names + else: + self.matching_names = matching_names + if len(self.matching_names) == 0: + raise ValueError("Invalid Definition") + + def resolve_definition(self): + """ Try to resolve the definition based on the information available. + + Returns: boolean - True if successfully resolved and False if it can't be resolved from information available. + + Raises: ValueError - If the actual_contents conflict. + + If the definition has already been resolved, this rechecks based on the information. + + """ + tuple_list = [(key, value) for key, value in self.actual_contents.items()] + candidate_tuple = tuple_list[0] + candidate_contents = candidate_tuple[1].copy() + match_tags = candidate_contents.find_tags(self.matching_names, True, include_groups=0) + candidate_tags = [] + for tag in match_tags: + is_match = True + tag_extension = tag.extension + for this_tuple in tuple_list[1:]: + tag.extension = this_tuple[0] + is_match = candidate_contents == this_tuple[1] + tag.extension = tag_extension + if not is_match: + break + if is_match: + candidate_tags.append(tag) + if len(candidate_tags) == 1: + candidate_tags[0].extension = '#' + self.resolved_definition = candidate_contents + return True + if len(candidate_tags) == 0 or (1 < len(candidate_tags) < len(tuple_list)): + raise ValueError("Invalid Definition") + return False + + def get_definition_string(self): + if self.def_tag_name is None or self.resolved_definition is None: + return None + return f"({self.def_tag_name}, {str(self.resolved_definition)})" + + +class DefExpandGatherer: + """ Gather definitions from a series of def-expands, including possibly ambiguous ones. + + Notes: The def-dict contains the known definitions. After validation, it also contains resolved definitions. + The errors contain the definition contents that are known to be in error. The ambiguous_defs contain the definitions + that cannot be resolved based on the data. + + """ + def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None): + """Initialize the DefExpandGatherer class. + + Parameters: + hed_schema (HedSchema): The HED schema to be used for processing. + known_defs (str or list or DefinitionDict): A dictionary of known definitions. + ambiguous_defs (dict, optional): A dictionary of ambiguous def-expand definitions. + + """ + self.hed_schema = hed_schema + self.ambiguous_defs = ambiguous_defs if ambiguous_defs else {} + self.errors = errors if errors else {} + self.def_dict = DefinitionDict(known_defs, self.hed_schema) + + def process_def_expands(self, hed_strings, known_defs=None): + """Process the HED strings containing def-expand tags. + + Parameters: + hed_strings (pd.Series or list): A Pandas Series or list of HED strings to be processed. + known_defs (dict, optional): A dictionary of known definitions to be added. + + Returns: + tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. + """ + if not isinstance(hed_strings, pd.Series): + hed_strings = pd.Series(hed_strings) + + def_expand_mask = hed_strings.str.contains('Def-Expand/', case=False) + + if known_defs: + self.def_dict.add_definitions(known_defs, self.hed_schema) + for i in hed_strings[def_expand_mask].index: + string = hed_strings.loc[i] + self._process_def_expand(string) + self._resolve_ambiguous() + return self.def_dict, self.ambiguous_defs, self.errors + + def _process_def_expand(self, string): + """Process a single HED string to extract definitions and handle known and ambiguous definitions. + + Parameters: + string (str): The HED string to be processed. + """ + hed_str = HedString(string, self.hed_schema) + hed_str.sort() + for def_tag, def_expand_group, def_group in hed_str.find_def_tags(recursive=True): + if def_tag == def_expand_group: + continue + + if not self._handle_known_definition(def_tag, def_expand_group): + self._handle_ambiguous_definition(def_tag, def_expand_group) + + def _handle_known_definition(self, def_tag, def_expand_group): + """Handle known def-expand tag in a HED string. + + Parameters: + def_tag (HedTag): The def-expand tag. + def_expand_group (HedGroup): The group containing the entire Def-expand tag and its group. + + Returns: + bool: True if the def-expand tag is known and handled, False otherwise. + """ + def_tag_name = def_tag.extension.split('/')[0] + def_group_contents = self.def_dict._get_definition_contents(def_tag) + def_expand_group.sort() + + # If this definition is already known, make sure it agrees. + if def_group_contents: + if def_group_contents != def_expand_group: + self.errors.setdefault(def_tag_name.casefold(), []).append(def_expand_group.get_first_group()) + return True + + has_extension = "/" in def_tag.extension + if not has_extension: + group_tag = def_expand_group.get_first_group() + self.def_dict.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag, + takes_value=False, + source_context=[]) + return True + + # this is needed for the cases where we have a definition with errors, but it's not a known definition. + if def_tag_name.casefold() in self.errors: + self.errors.setdefault(f"{def_tag_name.casefold()}", []).append(def_expand_group.get_first_group()) + return True + + return False + + def _handle_ambiguous_definition(self, def_tag, def_expand_group): + """ Handle ambiguous def-expand tag in a HED string. + + Parameters: + def_tag (HedTag): The def-expand tag. + def_expand_group (HedGroup): The group containing the def-expand tag. + """ + def_tag_name = def_tag.extension.split('/')[0] + + # Return the AmbiguousDefinition object associated with this def_tag name + these_defs = self.ambiguous_defs.setdefault(def_tag_name.casefold(), AmbiguousDef()) + try: + these_defs.add_def(def_tag, def_expand_group) + except ValueError: + errors = self.errors.setdefault(def_tag_name.casefold(), []) + errors = errors + list(these_defs.actual_contents.values()) + self.errors[def_tag_name.casefold()] = errors + del self.ambiguous_defs[def_tag_name.casefold()] + + def _resolve_ambiguous(self): + """ Do a final validation on each ambiguous group. + + Notes: + If found to be invalid, the ambiguous definition contents are transferred to the errors. + + """ + delete_list = [] + for def_name, ambiguous_def in self.ambiguous_defs.items(): # Iterate over a copy of the keys + try: + is_resolved = ambiguous_def.resolve_definition() + if is_resolved: + def_string = ambiguous_def.get_definition_string() + if def_string is None: + return + self.def_dict.add_definitions(def_string, self.hed_schema) + delete_list.append(def_name) + except ValueError: + contents_list = self.errors.setdefault(def_name, []) + contents_list += list(ambiguous_def.actual_contents.values()) + self.errors[def_name] = contents_list + delete_list.append(def_name) + + for def_name in delete_list: + del self.ambiguous_defs[def_name] diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index b7996e84e..a47c4c090 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -150,7 +150,8 @@ def check_for_definitions(self, hed_string_obj, error_handler=None): return def_issues - def _strip_value_placeholder(self, def_tag_name): + @staticmethod + def _strip_value_placeholder(def_tag_name): def_takes_value = def_tag_name.endswith("/#") if def_takes_value: def_tag_name = def_tag_name[:-len("/#")] @@ -168,10 +169,24 @@ def _validate_name_and_context(self, def_tag_name, error_handler): def_name=def_tag_name) return new_def_issues, context - def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_handler): + @staticmethod + def _validate_placeholders(def_tag_name, group, def_takes_value, error_handler): + """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value). + + Parameters: + def_tag_name (str): The name of the definition without any Definition tag or value. + group (HedGroup): The contents of the definition. + def_takes_value (bool): True if the definition takes a value (should have #). + error_handler (ErrorHandler or None): Error context used to identify where definitions are found. + + Returns: + list: List of issues encountered in checking for definitions. Each issue is a dictionary. + """ new_issues = [] placeholder_tags = [] tags_with_issues = [] + + # Find the tags that have # in their strings and return issues of count > 1. if group: for tag in group.get_all_tags(): count = str(tag).count("#") @@ -186,7 +201,7 @@ def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_han def_name=def_tag_name, tag_list=tags_with_issues, expected_count=1 if def_takes_value else 0) - + # Make sure placeholder count is correct. if (len(placeholder_tags) == 1) != def_takes_value: new_issues += ErrorHandler.format_error_with_context(error_handler, DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, @@ -195,6 +210,7 @@ def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_han expected_count=1 if def_takes_value else 0) return new_issues + # Make sure that the tag with the placeholder is allowed to take a value. if def_takes_value: placeholder_tag = placeholder_tags[0] if not placeholder_tag.is_takes_value_tag(): @@ -205,16 +221,21 @@ def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_han return new_issues - def _find_group(self, definition_tag, group, error_handler): + @staticmethod + def _find_group(definition_tag, group, error_handler): + """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value). + + Parameters: + definition_tag (HedTag): The Definition tag itself. + group (HedGroup): The entire definition group include the Definition tag. + error_handler (ErrorHandler or None): Error context used to identify where definitions are found. + + Returns: + list: List of issues encountered in checking for definitions. Each issue is a dictionary. + """ # initial validation groups = group.groups() issues = [] - # tags = group.tags() - # if len(tags) != 1: - # issues += \ - # ErrorHandler.format_error_with_context(error_handler, - # DefinitionErrors.WRONG_NUMBER_TAGS, - # def_name=definition_tag.extension, tag_list=tags) if len(groups) > 1: issues += \ ErrorHandler.format_error_with_context(error_handler, @@ -237,7 +258,8 @@ def _find_group(self, definition_tag, group, error_handler): return group_tag, issues - def _validate_contents(self, definition_tag, group, error_handler): + @staticmethod + def _validate_contents(definition_tag, group, error_handler): issues = [] if group: def_keys = {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY} diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 492272acf..41255c256 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -11,7 +11,7 @@ def __init__(self, name, contents, takes_value, source_context): Parameters: name (str): The label portion of this name (not including Definition/). - contents (HedGroup): The contents of this definition. + contents (HedGroup): The contents of this definition (which could be None). takes_value (bool): If True, expects ONE tag to have a single # sign in it. source_context (list, None): List (stack) of dictionaries giving context for reporting errors. """ diff --git a/hed/models/df_util.py b/hed/models/df_util.py index c7caf16a7..609ab5c84 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -253,12 +253,10 @@ def filter_series_by_onset(series, onsets): Returns: Series or Dataframe: the series with rows filtered together. """ - #indexed_dict = _indexed_dict_from_onsets(pd.to_numeric(onsets, errors='coerce')) - #return _filter_by_index_list(series, indexed_dict=indexed_dict) + indexed_dict = _indexed_dict_from_onsets(pd.to_numeric(onsets, errors='coerce')) - y = _filter_by_index_list(series, indexed_dict=indexed_dict) + y = _filter_by_index_list(series, indexed_dict=indexed_dict) return y - # return _filter_by_index_list(series, indexed_dict=indexed_dict) def _indexed_dict_from_onsets(onsets): @@ -268,7 +266,7 @@ def _indexed_dict_from_onsets(onsets): indexed_dict = defaultdict(list) for i, onset in enumerate(onsets): - if math.isnan(onset): # Ignore NaNs + if math.isnan(onset): # Ignore NaNs continue if abs(onset - current_onset) > tol: current_onset = onset @@ -276,19 +274,6 @@ def _indexed_dict_from_onsets(onsets): return indexed_dict -# def _indexed_dict_from_onsets(onsets): -# """Finds series of consecutive lines with the same(or close enough) onset""" -# current_onset = -1000000.0 -# tol = 1e-9 -# from collections import defaultdict -# indexed_dict = defaultdict(list) -# for i, onset in enumerate(onsets): -# if abs(onset - current_onset) > tol: -# current_onset = onset -# indexed_dict[current_onset].append(i) -# -# return indexed_dict - def _filter_by_index_list(original_data, indexed_dict): """Filters a series or dataframe by the indexed_dict, joining lines as indicated""" diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 647f64639..405288c05 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -1,662 +1,664 @@ -""" A single HED tag. """ -from hed.schema.hed_schema_constants import HedKey -import copy -from hed.models.model_constants import DefTagNames - - -class HedTag: - """ A single HED tag. - - Notes: - - HedTag is a smart class in that it keeps track of its original value and positioning - as well as pointers to the relevant HED schema information, if relevant. - - """ - - def __init__(self, hed_string, hed_schema, span=None, def_dict=None): - """ Creates a HedTag. - - Parameters: - hed_string (str): Source HED string for this tag. - hed_schema (HedSchema): A parameter for calculating canonical forms on creation. - span (int, int): The start and end indexes of the tag in the hed_string. - def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags. - """ - self._hed_string = hed_string - if span is None: - span = (0, len(hed_string)) - # This is the span into the original HED string for this tag - self.span = span - - # If this is present, use this as the org tag for most purposes. - # This is not generally used anymore, but you can use it to replace a tag in place. - self._tag = None - - self._namespace = self._get_schema_namespace(self.org_tag) - - # This is the schema this tag was converted to. - self._schema = None - self._schema_entry = None - - self._extension_value = "" - self._parent = None - - self._expandable = None - self._expanded = False - - self.tag_terms = None # tuple of all the terms in this tag Lowercase. - self._calculate_to_canonical_forms(hed_schema) - - self._def_entry = None - if def_dict: - if self.short_base_tag in {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY}: - self._def_entry = def_dict.get_definition_entry(self) - - def copy(self): - """ Return a deep copy of this tag. - - Returns: - HedTag: The copied group. - - """ - save_parent = self._parent - self._parent = None - return_copy = copy.deepcopy(self) - self._parent = save_parent - return return_copy - - @property - def schema_namespace(self): - """ Library namespace for this tag if one exists. - - Returns: - namespace (str): The library namespace, including the colon. - - """ - return self._namespace - - @property - def short_tag(self): - """ Short form including value or extension. - - Returns: - short_tag (str): The short form of the tag, including value or extension. - - """ - if self._schema_entry: - return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}" - - return str(self) - - @property - def base_tag(self): - """ Long form without value or extension. - - Returns: - base_tag (str): The long form of the tag, without value or extension. - """ - if self._schema_entry: - return self._schema_entry.long_tag_name - return str(self) - - @property - def short_base_tag(self): - """ Short form without value or extension. - - Returns: - base_tag (str): The short non-extension port of a tag. - - Notes: - - ParentNodes/Def/DefName would return just "Def". - - """ - if self._schema_entry: - return self._schema_entry.short_tag_name - return str(self) - - @short_base_tag.setter - def short_base_tag(self, new_tag_val): - """ Change base tag, leaving extension or value. - - Parameters: - new_tag_val (str): The new short_base_tag for this tag. - - :raises ValueError: - - If the tag wasn't already identified. - - Note: - - Generally this is used to swap def to def-expand. - """ - if self._schema_entry: - tag_entry = None - if self._schema: - if self.is_takes_value_tag(): - new_tag_val = new_tag_val + "/#" - tag_entry = self._schema.get_tag_entry(new_tag_val, schema_namespace=self.schema_namespace) - - self._schema_entry = tag_entry - else: - raise ValueError("Cannot set unidentified tags") - - @property - def org_base_tag(self): - """ Original form without value or extension. - - Returns: - base_tag (str): The original form of the tag, without value or extension. - - Notes: - - Warning: This could be empty if the original tag had a name_prefix prepended. - e.g. a column where "Label/" is prepended, thus the column value has zero base portion. - """ - if self._schema_entry: - extension_len = len(self._extension_value) - if not extension_len: - return self.tag - - org_len = len(self.tag) - if org_len == extension_len: - return "" - - return self.tag[:org_len - extension_len] - return str(self) - - def tag_modified(self): - """ Return True if tag has been modified from original. - - Returns: - bool: Return True if the tag is modified. - - Notes: - - Modifications can include adding a column name_prefix. - - """ - return bool(self._tag) - - @property - def tag(self): - """ Returns the tag. - - Returns the original tag if no user form set. - - Returns: - tag (str): The custom set user form of the tag. - - """ - if self._tag: - return self._tag - - return self.org_tag - - @tag.setter - def tag(self, new_tag_val): - """ Allow you to overwrite the tag output text. - - Parameters: - new_tag_val (str): New (implicitly long form) of tag to set. - - Notes: - - You probably don't actually want to call this. - """ - self._tag = new_tag_val - self._schema_entry = None - self._calculate_to_canonical_forms(self._schema) - - @property - def extension(self): - """ Get the extension or value of tag. - - Generally this is just the portion after the last slash. - Returns an empty string if no extension or value. - - Returns: - str: The tag name. - - Notes: - - This tag must have been computed first. - - """ - if self._extension_value: - return self._extension_value[1:] - - return "" - - @extension.setter - def extension(self, x): - self._extension_value = f"/{x}" - - @property - def long_tag(self): - """ Long form including value or extension. - - Returns: - str: The long form of this tag. - - """ - if self._schema_entry: - return f"{self._namespace}{self._schema_entry.long_tag_name}{self._extension_value}" - return str(self) - - @property - def org_tag(self): - """ Return the original unmodified tag. - - Returns: - str: The original unmodified tag. - - """ - return self._hed_string[self.span[0]:self.span[1]] - - @property - def expanded(self): - """Return if this is currently expanded or not. - - Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present. - - Returns: - bool: Returns True if this is currently expanded. - """ - return self._expanded - - @property - def expandable(self): - """Return what this expands to. - - This is primarily used for Def/Def-expand tags at present. - - Lazily set the first time it's called. - - Returns: - HedGroup or HedTag or None: Returns the expanded form of this tag. - """ - if self._expandable is None and self._def_entry: - save_parent = self._parent - tag_label, _, placeholder = self.extension.partition('/') - - def_contents = self._def_entry.get_definition(self, placeholder_value=placeholder) - self._parent = save_parent - if def_contents is not None: - self._expandable = def_contents - self._expanded = self.short_base_tag == DefTagNames.DEF_EXPAND_KEY - return self._expandable - - def is_column_ref(self): - """ Return if this tag is a column reference from a sidecar. - - You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise. - - Returns: - bool: Returns True if this is a column ref. - """ - return self.org_tag.startswith('{') and self.org_tag.endswith('}') - - def __str__(self): - """ Convert this HedTag to a string. - - Returns: - str: The original tag if we haven't set a new tag.(e.g. short to long). - - """ - if self._schema_entry: - return self.short_tag - - if self._tag: - return self._tag - - return self._hed_string[self.span[0]:self.span[1]] - - def lower(self): - """ Convenience function, equivalent to str(self).lower(). """ - return str(self).lower() - - def casefold(self): - """ Convenience function, equivalent to str(self).casefold(). """ - return str(self).casefold() - - def _calculate_to_canonical_forms(self, hed_schema): - """ Update internal state based on schema. - - Parameters: - hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag. - - Returns: - list: A list of issues found during conversion. Each element is a dictionary. - - """ - tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_namespace) - self._schema_entry = tag_entry - self._schema = hed_schema - if self._schema_entry: - self.tag_terms = self._schema_entry.tag_terms - if remainder: - self._extension_value = remainder - else: - self.tag_terms = tuple() - - return tag_issues - - def get_stripped_unit_value(self, extension_text): - """ Return the extension divided into value and units, if the units are valid. - - Parameters: - extension_text (str): The text to split, in case it's a portion of a tag. - - Returns: - stripped_unit_value (str): The extension portion with the units removed. - unit (str or None): None if no valid unit found. - - Examples: - 'Duration/3 ms' will return '3' - - """ - tag_unit_classes = self.unit_classes - stripped_value, unit, _ = HedTag._get_tag_units_portion(extension_text, tag_unit_classes) - if stripped_value: - return stripped_value, unit - - return self.extension, None - - def value_as_default_unit(self): - """ Return the value converted to default units if possible. - - Returns None if the units are invalid.(No default unit or invalid). - - Returns: - value (float or None): The extension value as default units. - If there are no default units, returns None. - - Examples: - 'Duration/300 ms' will return .3 - - """ - tag_unit_classes = self.unit_classes - value, _, units = self.extension.rpartition(" ") - if not value: - stripped_value = units - unit_entry = self.default_unit - unit = unit_entry.name - else: - stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes) - - if stripped_value: - if unit_entry.get_conversion_factor(unit) is not None: - return float(stripped_value) * unit_entry.get_conversion_factor(unit) - - @property - def unit_classes(self): - """ Return a dict of all the unit classes this tag accepts. - - Returns: - unit_classes (dict): A dict of unit classes this tag accepts. - - Notes: - - Returns empty dict if this is not a unit class tag. - - The dictionary has unit name as the key and HedSchemaEntry as value. - - """ - if self._schema_entry: - return self._schema_entry.unit_classes - return {} - - @property - def value_classes(self): - """ Return a dict of all the value classes this tag accepts. - - Returns: - dict: A dictionary of HedSchemaEntry value classes this tag accepts. - - Notes: - - Returns empty dict if this is not a value class. - - The dictionary has unit name as the key and HedSchemaEntry as value. - - """ - if self._schema_entry: - return self._schema_entry.value_classes - return {} - - @property - def attributes(self): - """ Return a dict of all the attributes this tag has. - - Returns empty dict if this is not a value tag. - - Returns: - dict: A dict of attributes this tag has. - - Notes: - - Returns empty dict if this is not a unit class tag. - - The dictionary has unit name as the key and HedSchemaEntry as value. - - """ - if self._schema_entry: - return self._schema_entry.attributes - return {} - - def tag_exists_in_schema(self): - """ Return whether the schema entry for this tag exists. - - Returns: - bool: True if this tag exists. - - Notes: - - This does NOT assure this is a valid tag. - """ - return bool(self._schema_entry) - - def is_takes_value_tag(self): - """ Return True if this is a takes value tag. - - Returns: - bool: True if this is a takes value tag. - - """ - if self._schema_entry: - return self._schema_entry.has_attribute(HedKey.TakesValue) - return False - - def is_unit_class_tag(self): - """ Return True if this is a unit class tag. - - Returns: - bool: True if this is a unit class tag. - - """ - if self._schema_entry: - return bool(self._schema_entry.unit_classes) - return False - - def is_value_class_tag(self): - """ Return True if this is a value class tag. - - Returns: - bool: True if this is a tag with a value class. - - """ - if self._schema_entry: - return bool(self._schema_entry.value_classes) - return False - - def is_basic_tag(self): - """ Return True if a known tag with no extension or value. - - Returns: - bool: True if this is a known tag without extension or value. - - """ - return bool(self._schema_entry and not self.extension) - - def has_attribute(self, attribute): - """ Return True if this is an attribute this tag has. - - Parameters: - attribute (str): Name of the attribute. - - Returns: - bool: True if this tag has the attribute. - - """ - if self._schema_entry: - return self._schema_entry.has_attribute(attribute) - return False - - def get_tag_unit_class_units(self): - """ Get the unit class units associated with a particular tag. - - Returns: - list: A list containing the unit class units associated with a particular tag or an empty list. - - """ - units = [] - unit_classes = self.unit_classes - for unit_class_entry in unit_classes.values(): - units += unit_class_entry.units.keys() - - return units - - @property - def default_unit(self): - """ Get the default unit class unit for this tag. - - Only a tag with a single unit class can have default units. - - Returns: - unit(UnitEntry or None): the default unit entry for this tag, or None - """ - # todo: Make this cached - unit_classes = self.unit_classes.values() - if len(unit_classes) == 1: - first_unit_class_entry = list(unit_classes)[0] - default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True) - return first_unit_class_entry.units.get(default_unit, None) - - def base_tag_has_attribute(self, tag_attribute): - """ Check to see if the tag has a specific attribute. - - This is primarily used to check for things like TopLevelTag on Definitions and similar. - - Parameters: - tag_attribute (str): A tag attribute. - - Returns: - bool: True if the tag has the specified attribute. False, if otherwise. - - """ - if not self._schema_entry: - return False - - return self._schema_entry.base_tag_has_attribute(tag_attribute) - - @staticmethod - def _get_schema_namespace(org_tag): - """ Finds the library namespace for the tag. - - Parameters: - org_tag (str): A string representing a tag. - - Returns: - str: Library namespace string or empty. - - """ - first_slash = org_tag.find("/") - first_colon = org_tag.find(":") - - if first_colon != -1: - if first_slash != -1 and first_colon > first_slash: - return "" - - return org_tag[:first_colon + 1] - return "" - - @staticmethod - def _get_tag_units_portion(extension_text, tag_unit_classes): - """ Check that this string has valid units and remove them. - - Parameters: - tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag. - - Returns: - stripped_value (str or None): The value with the units removed. - This is filled in if there are no units as well. - unit (UnitEntry or None): The matching unit entry if one is found - """ - value, _, units = extension_text.rpartition(" ") - if not units: - return None, None, None - - for unit_class_entry in tag_unit_classes.values(): - possible_match = unit_class_entry.get_derivative_unit_entry(units) - if possible_match and not possible_match.has_attribute(HedKey.UnitPrefix): - return value, units, possible_match - - # Repeat the above, but as a prefix - possible_match = unit_class_entry.get_derivative_unit_entry(value) - if possible_match and possible_match.has_attribute(HedKey.UnitPrefix): - return units, value, possible_match - - return None, None, None - - def is_placeholder(self): - """Returns if this tag has a placeholder in it. - - Returns: - has_placeholder(bool): True if it has a placeholder - """ - if "#" in self.org_tag or "#" in self._extension_value: - return True - return False - - def replace_placeholder(self, placeholder_value): - """ If tag has a placeholder character(#), replace with value. - - Parameters: - placeholder_value (str): Value to replace placeholder with. - - """ - if self.is_placeholder(): - if self._schema_entry: - self._extension_value = self._extension_value.replace("#", placeholder_value) - else: - self._tag = self.tag.replace("#", placeholder_value) - - def __hash__(self): - if self._schema_entry: - return hash( - self._namespace + self._schema_entry.short_tag_name.casefold() + self._extension_value.casefold()) - else: - return hash(self.casefold()) - - def __eq__(self, other): - if self is other: - return True - - if isinstance(other, str): - return self.casefold() == other.casefold() - - if not isinstance(other, HedTag): - return False - - if self.short_tag == other.short_tag: - return True - - if self.org_tag.casefold() == other.org_tag.casefold(): - return True - return False - - def __deepcopy__(self, memo): - # Check if the object has already been copied. - if id(self) in memo: - return memo[id(self)] - - # create a new instance of HedTag class - new_tag = self.__class__.__new__(self.__class__) - new_tag.__dict__.update(self.__dict__) - - # add the new object to the memo dictionary - memo[id(self)] = new_tag - - # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry) - new_tag._parent = copy.deepcopy(self._parent, memo) - new_tag._expandable = copy.deepcopy(self._expandable, memo) - new_tag._expanded = copy.deepcopy(self._expanded, memo) - - return new_tag +""" A single HED tag. """ +from hed.schema.hed_schema_constants import HedKey +import copy +from hed.models.model_constants import DefTagNames + + +class HedTag: + """ A single HED tag. + + Notes: + - HedTag is a smart class in that it keeps track of its original value and positioning + as well as pointers to the relevant HED schema information, if relevant. + + """ + + def __init__(self, hed_string, hed_schema, span=None, def_dict=None): + """ Creates a HedTag. + + Parameters: + hed_string (str): Source HED string for this tag. + hed_schema (HedSchema): A parameter for calculating canonical forms on creation. + span (int, int): The start and end indexes of the tag in the hed_string. + def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags. + """ + self._hed_string = hed_string + if span is None: + span = (0, len(hed_string)) + # This is the span into the original HED string for this tag + self.span = span + + # If this is present, use this as the org tag for most purposes. + # This is not generally used anymore, but you can use it to replace a tag in place. + self._tag = None + + self._namespace = self._get_schema_namespace(self.org_tag) + + # This is the schema this tag was converted to. + self._schema = None + self._schema_entry = None + + self._extension_value = "" + self._parent = None + + self._expandable = None + self._expanded = False + + self.tag_terms = None # tuple of all the terms in this tag Lowercase. + self._calculate_to_canonical_forms(hed_schema) + + self._def_entry = None + if def_dict: + if self.short_base_tag in {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY}: + self._def_entry = def_dict.get_definition_entry(self) + + def copy(self): + """ Return a deep copy of this tag. + + Returns: + HedTag: The copied group. + + """ + save_parent = self._parent + self._parent = None + return_copy = copy.deepcopy(self) + self._parent = save_parent + return return_copy + + @property + def schema_namespace(self): + """ Library namespace for this tag if one exists. + + Returns: + namespace (str): The library namespace, including the colon. + + """ + return self._namespace + + @property + def short_tag(self): + """ Short form including value or extension. + + Returns: + str: The short form of the tag, including value or extension. + + """ + if self._schema_entry: + return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}" + + return str(self) + + @property + def base_tag(self): + """ Long form without value or extension. + + Returns: + base_tag (str): The long form of the tag, without value or extension. + """ + if self._schema_entry: + return self._schema_entry.long_tag_name + return str(self) + + @property + def short_base_tag(self): + """ Short form without value or extension. + + Returns: + str: The short non-extension port of a tag. + + Notes: + - ParentNodes/Def/DefName would return just "Def". + + """ + if self._schema_entry: + return self._schema_entry.short_tag_name + return str(self) + + @short_base_tag.setter + def short_base_tag(self, new_tag_val): + """ Change base tag, leaving extension or value. + + Parameters: + new_tag_val (str): The new short_base_tag for this tag. + + :raises ValueError: + - If the tag wasn't already identified. + + Note: + - Generally this is used to swap def to def-expand. + """ + if self._schema_entry: + tag_entry = None + if self._schema: + if self.is_takes_value_tag(): + new_tag_val = new_tag_val + "/#" + tag_entry = self._schema.get_tag_entry(new_tag_val, schema_namespace=self.schema_namespace) + + self._schema_entry = tag_entry + else: + raise ValueError("Cannot set unidentified tags") + + @property + def org_base_tag(self): + """ Original form without value or extension. + + Returns: + str: The original form of the tag, without value or extension. + + Notes: + - Warning: This could be empty if the original tag had a name_prefix prepended. + e.g. a column where "Label/" is prepended, thus the column value has zero base portion. + """ + if self._schema_entry: + extension_len = len(self._extension_value) + if not extension_len: + return self.tag + + org_len = len(self.tag) + if org_len == extension_len: + return "" + + return self.tag[:org_len - extension_len] + return str(self) + + def tag_modified(self): + """ Return True if tag has been modified from original. + + Returns: + bool: Return True if the tag is modified. + + Notes: + - Modifications can include adding a column name_prefix. + + """ + return bool(self._tag) + + @property + def tag(self): + """ Returns the tag. + + Returns the original tag if no user form set. + + Returns: + str: The custom set user form of the tag. + + """ + if self._tag: + return self._tag + + return self.org_tag + + @tag.setter + def tag(self, new_tag_val): + """ Allow you to overwrite the tag output text. + + Parameters: + new_tag_val (str): New (implicitly long form) of tag to set. + + Notes: + - You probably don't actually want to call this. + """ + self._tag = new_tag_val + self._schema_entry = None + self._calculate_to_canonical_forms(self._schema) + + @property + def extension(self): + """ Get the extension or value of tag. + + Generally this is just the portion after the last slash. + Returns an empty string if no extension or value. + + Returns: + str: The tag name. + + Notes: + - This tag must have been computed first. + + """ + if self._extension_value: + return self._extension_value[1:] + + return "" + + @extension.setter + def extension(self, x): + self._extension_value = f"/{x}" + + @property + def long_tag(self): + """ Long form including value or extension. + + Returns: + str: The long form of this tag. + + """ + if self._schema_entry: + return f"{self._namespace}{self._schema_entry.long_tag_name}{self._extension_value}" + return str(self) + + @property + def org_tag(self): + """ Return the original unmodified tag. + + Returns: + str: The original unmodified tag. + + """ + return self._hed_string[self.span[0]:self.span[1]] + + @property + def expanded(self): + """Return if this is currently expanded or not. + + Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present. + + Returns: + bool: True if this is currently expanded. + """ + return self._expanded + + @property + def expandable(self): + """Return what this expands to. + + This is primarily used for Def/Def-expand tags at present. + + Lazily set the first time it's called. + + Returns: + HedGroup or HedTag or None: Returns the expanded form of this tag. + """ + if self._expandable is None and self._def_entry: + save_parent = self._parent + tag_label, _, placeholder = self.extension.partition('/') + + def_contents = self._def_entry.get_definition(self, placeholder_value=placeholder) + self._parent = save_parent + if def_contents is not None: + self._expandable = def_contents + self._expanded = self.short_base_tag == DefTagNames.DEF_EXPAND_KEY + return self._expandable + + def is_column_ref(self): + """ Return if this tag is a column reference from a sidecar. + + You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise. + + Returns: + bool: Returns True if this is a column ref. + """ + return self.org_tag.startswith('{') and self.org_tag.endswith('}') + + def __str__(self): + """ Convert this HedTag to a string. + + Returns: + str: The original tag if we haven't set a new tag.(e.g. short to long). + + """ + if self._schema_entry: + return self.short_tag + + if self._tag: + return self._tag + + return self._hed_string[self.span[0]:self.span[1]] + + def lower(self): + """ Convenience function, equivalent to str(self).lower(). """ + return str(self).lower() + + def casefold(self): + """ Convenience function, equivalent to str(self).casefold(). """ + return str(self).casefold() + + def _calculate_to_canonical_forms(self, hed_schema): + """ Update internal state based on schema. + + Parameters: + hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag. + + Returns: + list: A list of issues found during conversion. Each element is a dictionary. + + """ + tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_namespace) + self._schema_entry = tag_entry + self._schema = hed_schema + if self._schema_entry: + self.tag_terms = self._schema_entry.tag_terms + if remainder: + self._extension_value = remainder + else: + self.tag_terms = tuple() + + return tag_issues + + def get_stripped_unit_value(self, extension_text): + """ Return the extension divided into value and units, if the units are valid. + + Parameters: + extension_text (str): The text to split, in case it's a portion of a tag. + + Returns: + str or None: The extension portion with the units removed or None if invalid units. + str or None: The units or None if no units of the right unit class are found. + + Examples: + 'Duration/3 ms' will return ('3', 'ms') + + """ + tag_unit_classes = self.unit_classes + stripped_value, units, match = HedTag._get_tag_units_portion(extension_text, tag_unit_classes) + if stripped_value and match: + return stripped_value, units + elif units and not match: + return None, units + return extension_text, None + + def value_as_default_unit(self): + """ Return the value converted to default units if possible or None if invalid. + + Returns: + float or None: The extension value in default units. + If no default units it assumes that the extension value is in default units. + + Examples: + 'Duration/300 ms' will return .3 + + """ + tag_unit_classes = self.unit_classes + stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes) + if not stripped_value: + return None + if unit and not unit_entry: + return None + if unit and unit_entry and unit_entry.get_conversion_factor(unit) is not None: + return float(stripped_value) * unit_entry.get_conversion_factor(unit) + return float(stripped_value) + + @property + def unit_classes(self): + """ Return a dict of all the unit classes this tag accepts. + + Returns: + dict: A dict of unit classes this tag accepts. + + Notes: + - Returns empty dict if this is not a unit class tag. + - The dictionary has unit name as the key and HedSchemaEntry as value. + + """ + if self._schema_entry: + return self._schema_entry.unit_classes + return {} + + @property + def value_classes(self): + """ Return a dict of all the value classes this tag accepts. + + Returns: + dict: A dictionary of HedSchemaEntry value classes this tag accepts. + + Notes: + - Returns empty dict if this is not a value class. + - The dictionary has unit name as the key and HedSchemaEntry as value. + + """ + if self._schema_entry: + return self._schema_entry.value_classes + return {} + + @property + def attributes(self): + """ Return a dict of all the attributes this tag has. + + Returns empty dict if this is not a value tag. + + Returns: + dict: A dict of attributes this tag has. + + Notes: + - Returns empty dict if this is not a unit class tag. + - The dictionary has unit name as the key and HedSchemaEntry as value. + + """ + if self._schema_entry: + return self._schema_entry.attributes + return {} + + def tag_exists_in_schema(self): + """ Return whether the schema entry for this tag exists. + + Returns: + bool: True if this tag exists. + + Notes: + - This does NOT assure this is a valid tag. + """ + return bool(self._schema_entry) + + def is_takes_value_tag(self): + """ Return True if this is a takes value tag. + + Returns: + bool: True if this is a takes value tag. + + """ + if self._schema_entry: + return self._schema_entry.has_attribute(HedKey.TakesValue) + return False + + def is_unit_class_tag(self): + """ Return True if this is a unit class tag. + + Returns: + bool: True if this is a unit class tag. + + """ + if self._schema_entry: + return bool(self._schema_entry.unit_classes) + return False + + def is_value_class_tag(self): + """ Return True if this is a value class tag. + + Returns: + bool: True if this is a tag with a value class. + + """ + if self._schema_entry: + return bool(self._schema_entry.value_classes) + return False + + def is_basic_tag(self): + """ Return True if a known tag with no extension or value. + + Returns: + bool: True if this is a known tag without extension or value. + + """ + return bool(self._schema_entry and not self.extension) + + def has_attribute(self, attribute): + """ Return True if this is an attribute this tag has. + + Parameters: + attribute (str): Name of the attribute. + + Returns: + bool: True if this tag has the attribute. + + """ + if self._schema_entry: + return self._schema_entry.has_attribute(attribute) + return False + + def get_tag_unit_class_units(self): + """ Get the unit class units associated with a particular tag. + + Returns: + list: A list containing the unit class units associated with a particular tag or an empty list. + + """ + units = [] + unit_classes = self.unit_classes + for unit_class_entry in unit_classes.values(): + units += unit_class_entry.units.keys() + + return units + + @property + def default_unit(self): + """ Get the default unit class unit for this tag. + + Only a tag with a single unit class can have default units. + + Returns: + unit(UnitEntry or None): the default unit entry for this tag, or None + """ + # todo: Make this cached + unit_classes = self.unit_classes.values() + if len(unit_classes) == 1: + first_unit_class_entry = list(unit_classes)[0] + default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True) + return first_unit_class_entry.units.get(default_unit, None) + + def base_tag_has_attribute(self, tag_attribute): + """ Check to see if the tag has a specific attribute. + + This is primarily used to check for things like TopLevelTag on Definitions and similar. + + Parameters: + tag_attribute (str): A tag attribute. + + Returns: + bool: True if the tag has the specified attribute. False, if otherwise. + + """ + if not self._schema_entry: + return False + + return self._schema_entry.base_tag_has_attribute(tag_attribute) + + @staticmethod + def _get_schema_namespace(org_tag): + """ Finds the library namespace for the tag. + + Parameters: + org_tag (str): A string representing a tag. + + Returns: + str: Library namespace string or empty. + + """ + first_slash = org_tag.find("/") + first_colon = org_tag.find(":") + + if first_colon != -1: + if first_slash != -1 and first_colon > first_slash: + return "" + + return org_tag[:first_colon + 1] + return "" + + @staticmethod + def _get_tag_units_portion(extension_text, tag_unit_classes): + """ Split a value portion into value, units and its valid unitEntry (if any). + + Parameters: + extension_text (str): A string representing the value portion of a tag with unit classes. + tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag. + + Returns: + stripped_value (str or None): The value with the units removed. + This is filled in if there are no units as well. + units (str or None); The units string or None if no units. + unitEntry (UnitEntry or None): The matching unit entry if one is found + + Notes: + value, None, None -- value portion has no units. + value, units, unitEntry -- value portion has value and valid units. + value, units, None -- value portion has a value and invalid units. + + """ + value, _, units = extension_text.partition(" ") + if not units: + return value, None, None + + for unit_class_entry in tag_unit_classes.values(): + possible_match = unit_class_entry.get_derivative_unit_entry(units) + if possible_match: + return value, units, possible_match + return value, units, None + + def is_placeholder(self): + """Returns if this tag has a placeholder in it. + + Returns: + has_placeholder(bool): True if it has a placeholder + """ + if "#" in self.org_tag or "#" in self._extension_value: + return True + return False + + def replace_placeholder(self, placeholder_value): + """ If tag has a placeholder character(#), replace with value. + + Parameters: + placeholder_value (str): Value to replace placeholder with. + + """ + if self.is_placeholder(): + if self._schema_entry: + tag = self.tag.replace('#', placeholder_value) + self._extension_value = self._extension_value.replace("#", placeholder_value) + self.tag = tag + else: + self._tag = self.tag.replace("#", placeholder_value) + + def get_normalized_str(self): + if self._schema_entry: + return self._namespace + self._schema_entry.short_tag_name.casefold() + self._extension_value.casefold() + else: + return self.casefold() + + def __hash__(self): + return hash(self.get_normalized_str()) + + def __eq__(self, other): + if self is other: + return True + + if isinstance(other, str): + return self.casefold() == other.casefold() + + if not isinstance(other, HedTag): + return False + + if self.short_tag == other.short_tag: + return True + + if self.org_tag.casefold() == other.org_tag.casefold(): + return True + return False + + def __deepcopy__(self, memo): + # Check if the object has already been copied. + if id(self) in memo: + return memo[id(self)] + + # create a new instance of HedTag class + new_tag = self.__class__.__new__(self.__class__) + new_tag.__dict__.update(self.__dict__) + + # add the new object to the memo dictionary + memo[id(self)] = new_tag + + # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry) + new_tag._parent = copy.deepcopy(self._parent, memo) + new_tag._expandable = copy.deepcopy(self._expandable, memo) + new_tag._expanded = copy.deepcopy(self._expanded, memo) + + return new_tag diff --git a/hed/models/model_constants.py b/hed/models/model_constants.py index bbc249c0c..a1407b6f4 100644 --- a/hed/models/model_constants.py +++ b/hed/models/model_constants.py @@ -1,20 +1,21 @@ -""" Defined constants for definitions, def labels, and expanded labels. """ - - -class DefTagNames: - """ Source names for definitions, def labels, and expanded labels. """ - - DEF_KEY = 'Def' - DEF_EXPAND_KEY = 'Def-expand' - DEFINITION_KEY = "Definition" - - ONSET_KEY = "Onset" - OFFSET_KEY = "Offset" - INSET_KEY = "Inset" - DURATION_KEY = "Duration" - DELAY_KEY = "Delay" - - TEMPORAL_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY} - DURATION_KEYS = {DURATION_KEY, DELAY_KEY} - - ALL_TIME_KEYS = TEMPORAL_KEYS.union(DURATION_KEYS) +""" Defined constants for definitions, def labels, and expanded labels. """ + + +class DefTagNames: + """ Source names for definitions, def labels, and expanded labels. """ + + DEF_KEY = 'Def' + DEF_EXPAND_KEY = 'Def-expand' + DEFINITION_KEY = "Definition" + + ONSET_KEY = "Onset" + OFFSET_KEY = "Offset" + INSET_KEY = "Inset" + DURATION_KEY = "Duration" + DELAY_KEY = "Delay" + + TEMPORAL_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY} + DURATION_KEYS = {DURATION_KEY, DELAY_KEY} + + ALL_TIME_KEYS = TEMPORAL_KEYS.union(DURATION_KEYS) + TIMELINE_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY, DELAY_KEY} diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 65503fce2..5be41102d 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -152,12 +152,13 @@ def get_details_dict(self, def_gatherer): """ known_defs_summary = self._build_summary_dict(def_gatherer.def_dict, "Known Definitions", None, display_description=True) - ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", - def_gatherer.get_ambiguous_group) + # ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", + # def_gatherer.get_ambiguous_group) + # ambiguous_defs_summary = {} + # TODO: Summary of ambiguous definitions is not implemented errors_summary = self._build_summary_dict( def_gatherer.errors, "Errors", None) - known_defs_summary.update(ambiguous_defs_summary) known_defs_summary.update(errors_summary) return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary} # return known_defs_summary diff --git a/hed/validator/util/class_regex.json b/hed/validator/data/class_regex.json similarity index 100% rename from hed/validator/util/class_regex.json rename to hed/validator/data/class_regex.json diff --git a/hed/validator/data/reservedTags.json b/hed/validator/data/reservedTags.json new file mode 100644 index 000000000..34b40e43b --- /dev/null +++ b/hed/validator/data/reservedTags.json @@ -0,0 +1,128 @@ +{ + "Definition": { + "name": "Definition", + "noExtension": true, + "allowValue": true, + "allowTwoLevelValue": true, + "requireValue": true, + "exclusive": true, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 1, + "minNonDefSubgroups": 0, + "ERROR_CODE": "DEFINITION_INVALID", + "noSpliceInGroup": true, + "forbiddenSubgroupTags": ["Def", "Def-expand"], + "requiresTimeline": false, + "requiresDef": false, + "otherAllowedNonDefTags": [] + }, + "Delay": { + "name": "Delay", + "noExtension": true, + "allowValue": true, + "allowTwoLevelValue": false, + "requireValue": true, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 1, + "minNonDefSubgroups": 0, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "noSpliceInGroup": false, + "forbiddenSubgroupTags": [], + "requiresTimeline": true, + "requiresDef": false, + "otherAllowedNonDefTags": ["Duration", "Onset", "Offset", "Inset"] + }, + "Duration": { + "name": "Duration", + "noExtension": true, + "allowValue": true, + "allowTwoLevelValue": false, + "requireValue": true, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 1, + "minNonDefSubgroups": 1, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "noSpliceInGroup": false, + "forbiddenSubgroupTags": [], + "requiresTimeline": false, + "requiresDef": false, + "otherAllowedNonDefTags": ["Delay"] + }, + "Event-context": { + "name": "Event-context", + "noExtension": true, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": null, + "minNonDefSubgroups": 0, + "ERROR_CODE": "TAG_GROUP_ERROR", + "noSpliceInGroup": true, + "forbiddenSubgroupTags": [], + "requiresTimeline": false, + "requiresDef": false, + "otherAllowedNonDefTags": [] + }, + "Inset": { + "name": "Inset", + "noExtension": true, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 1, + "minNonDefSubgroups": 0, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "noSpliceInGroup": false, + "forbiddenSubgroupTags": [], + "requiresTimeline": true, + "requiresDef": true, + "otherAllowedNonDefTags": ["Delay"] + }, + "Offset": { + "name": "Offset", + "noExtension": true, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 0, + "minNonDefSubgroups": 0, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "noSpliceInGroup": false, + "forbiddenSubgroupTags": [], + "requiresTimeline": true, + "requiresDef": true, + "otherAllowedNonDefTags": ["Delay"] + }, + "Onset": { + "name": "Onset", + "noExtension": true, + "allowValue": false, + "allowTwoLevelValue": false, + "requireValue": false, + "exclusive": false, + "tagGroup": true, + "topLevelTagGroup": true, + "maxNonDefSubgroups": 1, + "minNonDefSubgroups": 0, + "ERROR_CODE": "TEMPORAL_TAG_ERROR", + "noSpliceInGroup": false, + "forbiddenSubgroupTags": [], + "requiresTimeline": true, + "requiresDef": true, + "otherAllowedNonDefTags": ["Delay"] + } +} diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 26731edc2..f06d3d77e 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -26,7 +26,7 @@ def validate_def_tags(self, hed_string_obj, hed_validator=None): """ Validate Def/Def-Expand tags. Parameters: - hed_string_obj (HedString): The hed string to process. + hed_string_obj (HedString): The HED string to process. hed_validator (HedValidator): Used to validate the placeholder replacement. Returns: list: Issues found related to validating defs. Each issue is a dictionary. @@ -71,33 +71,30 @@ def _validate_def_contents(self, def_tag, def_expand_group, hed_validator): def_expand_group (HedGroup or HedTag): Source group for this def-expand tag. Same as def_tag if this is not a def-expand tag. hed_validator (HedValidator): Used to validate the placeholder replacement. - + TODO: Figure out whether the hed_validator is needed as a parameter. Returns: - issues(list): Issues found from validating placeholders. + list: Issues found from validating placeholders. """ - def_issues = [] is_def_expand_tag = def_expand_group != def_tag tag_label, _, placeholder = def_tag.extension.partition('/') - label_tag_lower = tag_label.casefold() + + # Check if def_entry in def_dicts. def_entry = self.defs.get(label_tag_lower) - if def_entry is None: + if def_entry is None or def_entry.takes_value == (not placeholder): error_code = ValidationErrors.HED_DEF_UNMATCHED if is_def_expand_tag: error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED - def_issues += ErrorHandler.format_error(error_code, tag=def_tag) - else: - def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, - return_copy_of_tag=True) - if def_contents is not None: - if is_def_expand_tag and def_expand_group != def_contents: - def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, - tag=def_tag, actual_def=def_contents, - found_def=def_expand_group) - else: - def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) + return ErrorHandler.format_error(error_code, tag=def_tag) - return def_issues + # Check the special case of a definition without contents. + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) + if is_def_expand_tag and def_expand_group != def_contents: + return ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, + tag=def_tag, actual_def=def_contents, + found_def=def_expand_group) + + return [] def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=False): """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag""" @@ -109,13 +106,17 @@ def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=Fa if def_entry is None: return [] + # Make sure that there aren't any errant placeholders. + if not allow_placeholders and '#' in placeholder: + return ErrorHandler.format_error(ValidationErrors.HED_PLACEHOLDER_OUT_OF_CONTEXT, tag=def_tag.tag) + + # Set the appropriate error code error_code = ValidationErrors.DEF_INVALID if is_def_expand_tag: error_code = ValidationErrors.DEF_EXPAND_INVALID # Validate the def name vs the name class - def_issues = hed_validator._unit_validator._check_value_class(def_tag, tag_label, report_as=None, - error_code=error_code, index_offset=0) + def_issues = hed_validator._unit_validator._check_value_class(def_tag, tag_label, report_as=None) # def_issues += hed_validator.validate_units(def_tag, # tag_label, # error_code=error_code) @@ -123,11 +124,6 @@ def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=Fa def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) if def_contents and def_entry.takes_value and hed_validator: placeholder_tag = def_contents.get_first_group().find_placeholder_tag() - # Handle the case where they're adding a unit as part of a placeholder. eg Speed/# mph - if placeholder_tag: - placeholder = placeholder_tag.extension - if placeholder.startswith('# '): - placeholder = placeholder[2:] def_issues += hed_validator.validate_units(placeholder_tag, placeholder, report_as=def_tag, @@ -193,7 +189,8 @@ def validate_onset_offset(self, hed_string_obj): return onset_issues - def _find_onset_tags(self, hed_string_obj): + @staticmethod + def _find_onset_tags(hed_string_obj): return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) def _handle_onset_or_offset(self, def_tag): diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index dffd88fa8..875f4491b 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -80,11 +80,18 @@ def run_basic_checks(self, hed_string, allow_placeholders): return issues def run_full_string_checks(self, hed_string): - issues = [] - issues += self._group_validator.run_all_tags_validators(hed_string) - issues += self._group_validator.run_tag_level_validators(hed_string) - issues += self._def_validator.validate_onset_offset(hed_string) - return issues + checks = [ + self._group_validator.run_all_tags_validators, + self._group_validator.run_tag_level_validators, + self._def_validator.validate_onset_offset, + ] + + for check in checks: + issues = check(hed_string) # Call each function with `hed_string` + if issues: + return issues + + return [] # Return an empty list if no issues are found # Todo: mark semi private/actually private below this def _run_validate_tag_characters(self, original_tag, allow_placeholders): @@ -164,14 +171,11 @@ def validate_units(self, original_tag, validate_text=None, report_as=None, error issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag, validate_text, report_as=report_as, - error_code=error_code, - index_offset=index_offset) + error_code=error_code) elif original_tag.is_value_class_tag(): issues += self._unit_validator.check_tag_value_class_valid(original_tag, validate_text, - report_as=report_as, - error_code=error_code, - index_offset=index_offset) + report_as=report_as) elif original_tag.extension: issues += self._char_validator.check_for_invalid_extension_chars(original_tag, validate_text, diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index 1d7a04dd6..33eaf2a57 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -64,15 +64,15 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag): @staticmethod def check_for_banned_tags(hed_string): - """ Returns an issue for every tag found from the banned list + """ Returns an issue for every tag found from the banned list (for files without onset column). Parameters: - hed_string(HedString): the string to check + hed_string(HedString): The string to check. Returns: list: The validation issues associated with the characters. Each issue is dictionary. """ - banned_tag_list = DefTagNames.ALL_TIME_KEYS + banned_tag_list = DefTagNames.TIMELINE_KEYS issues = [] for tag in hed_string.get_all_tags(): if tag.short_base_tag in banned_tag_list: diff --git a/hed/validator/reserved_checker.py b/hed/validator/reserved_checker.py new file mode 100644 index 000000000..539f92d7f --- /dev/null +++ b/hed/validator/reserved_checker.py @@ -0,0 +1,179 @@ +import json +import os +import math +from threading import Lock +from collections import defaultdict +from hed.errors.error_types import ValidationErrors, TemporalErrors +from hed.errors.error_reporter import ErrorHandler + + +class ReservedChecker: + _instance = None + _lock = Lock() + reserved_reqs_path = os.path.join(os.path.dirname(__file__), "data/reservedTags.json") + + def __new__(cls): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super(ReservedChecker, cls).__new__(cls) + cls._instance._initialize() + return cls._instance + + def _initialize(self): + # Load the JSON file during the first instantiation + if not hasattr(self, "reserved_map"): + with open(self.reserved_reqs_path, 'r') as file: + self.reserved_map = json.load(file) + self._initialize_special_tags() + + @staticmethod + def get_instance(): + if ReservedChecker._instance is None: + ReservedChecker._instance = ReservedChecker() + return ReservedChecker._instance + + def _initialize_special_tags(self): + self.special_names = set(self.reserved_map.keys()) + self.require_value_tags = self._get_special_tags_by_property("requireValue") + self.no_extension_tags = self._get_special_tags_by_property("noExtension") + self.allow_two_level_value_tags = self._get_special_tags_by_property("allowTwoLevelValue") + self.top_group_tags = self._get_special_tags_by_property("topLevelTagGroup") + self.requires_def_tags = self._get_special_tags_by_property("requiresDef") + self.group_tags = self._get_special_tags_by_property("tagGroup") + self.exclusive_tags = self._get_special_tags_by_property("exclusive") + self.timelineTags = self._get_special_tags_by_property("requiresTimeline") + self.no_splice_in_group = self._get_special_tags_by_property("noSpliceInGroup") + self.has_forbidden_subgroup_tags = { + value["name"] + for value in self.reserved_map.values() + if len(value.get("forbiddenSubgroupTags", [])) > 0 + } + + def _get_special_tags_by_property(self, property_name): + return { + value["name"] + for value in self.reserved_map.values() + if value.get(property_name) is True + } + + def get_reserved(self, group): + reserved_tags = [tag for tag in group.tags() if tag.short_base_tag in self.special_names] + return reserved_tags + + @staticmethod + def _get_duplicates(tag_list): + grouped_tags = defaultdict(list) + for tag in tag_list: + grouped_tags[tag.short_base_tag].append(tag) + return grouped_tags + + def check_reserved_compatibility(self, group, reserved_tags): + """ Check to make sure that the reserved tags can be used together and no duplicates. + + Parameters: + group (HedTagGroup): A group to be checked. + reserved_tags (list of HedTag): A list of reserved tags in this group. + + """ + # Make sure there are no duplicate reserved tags + grouped = self._get_duplicates(reserved_tags) + multiples = [key for key, items in grouped.items() if len(items) > 1] + if len(multiples) > 0: + return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_REPEATED, + tag=grouped[multiples[0]][1], group=group) + # Test compatibility among the reserved tags + for tag in reserved_tags: + incompatible_tag = self.get_incompatible(tag, reserved_tags) + if incompatible_tag: + return ErrorHandler.format_error(ValidationErrors.HED_TAGS_NOT_ALLOWED, tag=incompatible_tag[0], + group=group) + return [] + + def check_tag_requirements(self, group, reserved_tags): + """ Check the tag requirements within the group. + + Parameters: + group (HedTagGroup): A group to be checked. + reserved_tags (list of HedTag): A list of reserved tags in this group. + + Notes: This is only called when there are some reserved incompatible tags. + """ + [requires_defs, defs] = self.get_def_information(group, reserved_tags) + if len(requires_defs) > 1: + return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_REPEATED, tag=requires_defs[0], + group=group) + if len(requires_defs) == 1 and len(defs) != 1: + return ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, tag=requires_defs[0]) + + if len(requires_defs) == 0 and len(defs) != 0: + return ErrorHandler.format_error(ValidationErrors.HED_TAGS_NOT_ALLOWED, tag=reserved_tags[0], group=group) + + other_tags = [tag for tag in group.tags() if tag not in reserved_tags and tag not in defs] + if len(other_tags) > 0: + return ErrorHandler.format_error(ValidationErrors.HED_TAGS_NOT_ALLOWED, tag=other_tags[0], group=group) + + # Check the subgroup requirements + other_groups = [group for group in group.groups() if group not in defs] + min_allowed, max_allowed = self.get_group_requirements(reserved_tags) + if not math.isinf(max_allowed) and len(other_groups) > max_allowed: + return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group, + group_count=str(len(other_groups))) + if group.is_group and not math.isinf(max_allowed) and min_allowed > len(other_groups): + return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group, + group_count=str(len(other_groups))) + return [] + + def get_group_requirements(self, reserved_tags): + """ Returns the maximum and minimum number of groups required for these reserved tags. + + Parameters: + reserved_tags (list of HedTag): The reserved tags to be checked. + + Returns: + tuple (max_required, min_required) + + """ + max_allowed = float('inf') + min_allowed = float('-inf') + for tag in reserved_tags: + requirements = self.reserved_map[tag.short_base_tag] + this_min = requirements['minNonDefSubgroups'] + if this_min is not None and this_min > min_allowed: + min_allowed = this_min + this_max = requirements['maxNonDefSubgroups'] + if this_max is not None and this_max < max_allowed: + max_allowed = this_max + if max_allowed < min_allowed and len(reserved_tags) > 1: + min_allowed = max_allowed + return min_allowed, max_allowed + + def get_def_information(self, group, reserved_tags): + requires_defs = [tag for tag in reserved_tags if tag.short_base_tag in self.requires_def_tags] + defs = group.find_def_tags(recursive=False, include_groups=1) + return [requires_defs, defs] + + def get_incompatible(self, tag, reserved_tags): + """ Return the list of tags that cannot be in the same group with tag. + + Parameters: + tag (HedTag) - reserved tag to be tested. + reserved_tags (list of HedTag) - reserved tags (no duplicates) + + Returns: + list of HedTag + + """ + requirements = self.reserved_map[tag.short_base_tag] + other_allowed = requirements["otherAllowedNonDefTags"] + incompatible = [this_tag for this_tag in reserved_tags + if this_tag.short_base_tag not in other_allowed and this_tag != tag] + return incompatible + + # Additional methods for other checks should be implemented here following similar patterns. + + +if __name__ == "__main__": + checker = ReservedChecker.get_instance() + print("ReservedChecker initialized successfully.") + print(checker.special_names) diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 5bbf821f0..e0c2782ef 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -89,6 +89,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) # Only do full string checks on full columns, not partial ref columns. if not is_ref_column: + # TODO: Figure out why this pattern is giving lint errors. refs = re.findall("\{([a-z_\-0-9]+)\}", hed_string, re.IGNORECASE) refs_strings = {data.column_name: data.get_hed_strings() for data in sidecar} if "HED" not in refs_strings: diff --git a/hed/validator/specialTags.json b/hed/validator/specialTags.json deleted file mode 100644 index 7190340a5..000000000 --- a/hed/validator/specialTags.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "Definition": { - "child": true, - "requireChild": true, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 0, - "ERROR_CODE": "DEFINITION_INVALID", - "subgroupTagsNotAllowed": [ - "Def", - "Def-expand", - "Event-context", - "Definition", - "Onset", - "Inset", - "Offset", - "Delay", - "Duration" - ], - "defTagRequired": false, - "otherAllowedTags": [] - }, - "Def": { - "child": true, - "tagGroup": false, - "topLevelTagGroup": false, - "maxNumberSubgroups": null, - "minNumberSubgroups": null, - "ERROR_CODE": "DEF_INVALID", - "subgroupTagsNotAllowed": [], - "defTagRequired": false, - "otherAllowedTags": null - }, - "Def-expand": { - "child": true, - "tagGroup": true, - "topLevelTagGroup": false, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 0, - "ERROR_CODE": "DEF_EXPAND_INVALID", - "subgroupTagsNotAllowed": [ - "Def", - "Def-expand", - "Event-context", - "Definition", - "Onset", - "Inset", - "Offset", - "Delay", - "Duration" - ], - "defTagRequired": false, - "otherAllowedTags": [] - }, - "Onset": { - "child": false, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 0, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": true, - "otherAllowedTags": [] - }, - "Inset": { - "child": false, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 0, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": true, - "otherAllowedTags": [] - }, - "Offset": { - "child": false, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 0, - "minNumberSubgroups": 0, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": [], - "defTagRequired": true, - "otherAllowedTags": [] - }, - "Delay": { - "child": true, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 1, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, - "otherAllowedTags": ["Duration"] - }, - "Duration": { - "child": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": 1, - "minNumberSubgroups": 1, - "ERROR_CODE": "TEMPORAL_TAG_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, - "otherAllowedTags": ["Delay"] - }, - "Event-context": { - "child": false, - "tagGroup": true, - "topLevelTagGroup": true, - "maxNumberSubgroups": null, - "minNumberSubgroups": 0, - "ERROR_CODE": "TAG_GROUP_ERROR", - "subgroupTagsNotAllowed": ["Event-context", "Definition", "Onset", "Inset", "Offset", "Delay", "Duration"], - "defTagRequired": false, - "otherAllowedTags": [] - } -} diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 9fd47443c..b219a11b4 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -1,6 +1,7 @@ """ Validates spreadsheet tabular data. """ import copy import pandas as pd +import math from hed.models.base_input import BaseInput from hed.errors.error_types import ColumnErrors, ErrorContext, ValidationErrors from hed.errors.error_reporter import ErrorHandler @@ -16,6 +17,8 @@ class SpreadsheetValidator: + ONSET_TOLERANCE = 10-7 + def __init__(self, hed_schema): """ Constructor for the SpreadsheetValidator class. @@ -79,6 +82,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): issues += self._run_checks(df, error_handler=error_handler, row_adj=row_adj, onset_mask=onset_mask) if self._onset_validator: issues += self._run_onset_checks(onsets, error_handler=error_handler, row_adj=row_adj) + issues += self._recheck_duplicates(onsets, error_handler=error_handler, row_adj=row_adj) error_handler.pop_error_context() issues = sort_issues(issues) @@ -118,6 +122,7 @@ def _run_checks(self, hed_df, error_handler, row_adj, onset_mask=None): error_handler.pop_error_context() # Row continue + # Continue on if not a timeline file row_string = HedString.from_hed_strings(row_strings) if row_string: @@ -149,8 +154,55 @@ def _run_onset_checks(self, onset_filtered, error_handler, row_adj): error_handler.pop_error_context() # Row return issues - def _run_onset_nan_checks(self, onsets, error_handler, row_adj): - return + def _recheck_duplicates(self, onset_filtered, error_handler, row_adj): + issues = [] + for i in range(len(onset_filtered) - 1): + current_row = onset_filtered.iloc[i] + next_row = onset_filtered.iloc[i + 1] + + # Skip if the HED column is empty or there was already an error + if not current_row["HED"] or \ + (current_row["original_index"] in self.invalid_original_rows) or \ + (not self._is_within_tolerance(next_row["onset"], current_row["onset"])): + continue + + # At least two rows have been merged with their onsets recognized as the same. + error_handler.push_error_context(ErrorContext.ROW, current_row.original_index + row_adj) + row_string = HedString(current_row.HED, self._schema, self._hed_validator._def_validator) + error_handler.push_error_context(ErrorContext.HED_STRING, row_string) + new_column_issues = self._hed_validator.run_full_string_checks(row_string) + error_handler.add_context_and_filter(new_column_issues) + error_handler.pop_error_context() # HedString + issues += new_column_issues + error_handler.pop_error_context() # Row + + return issues + + def _is_within_tolerance(self, onset1, onset2): + """ + Checks if two onset strings are within the specified tolerance. + + Parameters: + onset1 (str): The first onset value as a string. + onset2 (str): The second onset value as a string. + + Returns: + bool: True if the values are within tolerance and valid, False otherwise. + """ + try: + # Convert to floats + onset1 = float(onset1) + onset2 = float(onset2) + + # Check if both values are finite + if not (math.isfinite(onset1) and math.isfinite(onset2)): + return False + + # Check if the difference is within tolerance + return abs(onset1 - onset2) <= self.ONSET_TOLERANCE + except ValueError: + # Return False if either value is not convertible to a float + return False def _validate_column_structure(self, base_input, error_handler, row_adj): """ diff --git a/hed/validator/util/char_util.py b/hed/validator/util/char_util.py index 7e1d90555..8966ff4ee 100644 --- a/hed/validator/util/char_util.py +++ b/hed/validator/util/char_util.py @@ -6,7 +6,7 @@ from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import ValidationErrors -CLASS_REX_FILENAME = 'class_regex.json' +CLASS_REX_FILENAME = '../data/class_regex.json' class CharValidator: @@ -172,13 +172,13 @@ def __init__(self, modern_allowed_char_rules=False): super().__init__(modern_allowed_char_rules) self._rex_dict = self._get_rex_dict() - def get_problem_chars(self, input_string, class_name): + def get_problem_chars(self, in_str, cname): # List to store problem indices and characters bad_indices = [] # Retrieve the allowed character classes for the given class_name - allowed_classes = self._rex_dict["class_chars"].get(class_name, []) + allowed_classes = self._rex_dict["class_chars"].get(cname, []) if not allowed_classes: return bad_indices # Combine the corresponding regular expressions from the char_regex section @@ -191,19 +191,19 @@ def get_problem_chars(self, input_string, class_name): compiled_regex = re.compile(combined_regex) # Iterate through the input string, checking each character - for index, char in enumerate(input_string): + for index, char in enumerate(in_str): # If the character doesn't match the combined regex, it's a problem if not compiled_regex.match(char): bad_indices.append((index, char)) return bad_indices - def is_valid_value(self, input_string, class_name): + def is_valid_value(self, in_string, cname): # Retrieve the allowed character classes for the given class_name - class_regex = self._rex_dict["class_words"].get(class_name, []) + class_regex = self._rex_dict["class_words"].get(cname, []) if not class_regex: return True - match = re.match(class_regex, input_string) + match = re.match(class_regex, in_string) match = match if match else False return match diff --git a/hed/validator/util/class_util.py b/hed/validator/util/class_util.py index 310f63c9e..26558b443 100644 --- a/hed/validator/util/class_util.py +++ b/hed/validator/util/class_util.py @@ -45,8 +45,7 @@ def _get_default_value_class_validators(self): return validator_dict - def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, report_as=None, error_code=None, - index_offset=0): + def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, report_as=None, error_code=None): """ Report incorrect unit class or units. Parameters: @@ -54,24 +53,23 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. error_code (str): Override error codes. - index_offset (int): Offset into the extension validate_text starts at. Returns: list: Validation issues. Each issue is a dictionary. """ validation_issues = [] if original_tag.is_unit_class_tag(): - stripped_value, unit = original_tag.get_stripped_unit_value(validate_text) - # that are prefixes like $. Right now those are marked as unit invalid AND value_invalid. - bad_units = " " in stripped_value - if bad_units: - stripped_value = stripped_value.split(" ")[0] + # Check the units first + stripped_value, units = original_tag.get_stripped_unit_value(validate_text) + if not stripped_value: + validation_issues += self._report_bad_units(original_tag, report_as) + return validation_issues - validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code, - index_offset) - if not unit: - validation_issues += self._check_units(original_tag, bad_units, report_as) + # Check the value classes + validation_issues += self._check_value_class(original_tag, stripped_value, report_as) + if validation_issues: + return validation_issues # We don't want to give this overall error twice if error_code and validation_issues and not any(error_code == issue['code'] for issue in validation_issues): @@ -81,56 +79,18 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo return validation_issues - def check_tag_value_class_valid(self, original_tag, validate_text, report_as=None, error_code=None, - index_offset=0): + def check_tag_value_class_valid(self, original_tag, validate_text, report_as=None): """ Report an invalid value portion. Parameters: original_tag (HedTag): The original tag that is used to report the error. validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes. - index_offset(int): Offset into the extension validate_text starts at. Returns: list: Validation issues. """ - return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset) - - # def _get_tag_problem_indexes(self, original_tag, stripped_value, validation=True): - # """ Return list of problem indices for error messages. - # - # Parameters: - # original_tag (HedTag): The original tag that is used to report the error. - # stripped_value (str): value without units - # validation (bool): - # - # Returns: - # list: List of int locations in which error occurred. - # """ - # indexes = [] - # # Extra +1 for the slash - # start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 - # if start_index == -1: - # return indexes - # - # if not validation: - # return schema_validation_util_deprecated._get_disallowed_character_indexes(stripped_value, start_index) - # - # return self._get_problem_indices(stripped_value, "nameClass", start_index=start_index) - # classes = list(original_tag.value_classes.keys()) - # problems = {} - # okay_count = len(classes) - # for class_name in classes: - # indices = self._char_validator.get_problem_chars(stripped_value, class_name) - # if indices: - # problems[class_name] = [(char, index + start_index) for index, char in indices] - # else: - # okay_count -= 1 - # if okay_count: # At least one value class has an issue - # return problems - # else: - # return {} + return self._check_value_class(original_tag, validate_text, report_as) def _get_problem_indices(self, stripped_value, class_name, start_index=0): indices = self._char_validator.get_problem_chars(stripped_value, class_name) @@ -140,39 +100,13 @@ def _get_problem_indices(self, stripped_value, class_name, start_index=0): # value_classes = original_tag.value_classes.values() # allowed_characters = schema_validation_util.get_allowed_characters(original_tag.value_classes.values()) - # return schema_validation_util.get_problem_indexes(stripped_value, allowed_characters, index_adj=start_index) - - # @staticmethod - # def get_problem_index(validation_string, reg_ex, index_adj=0): - # """Finds indexes with values not in character set - # - # Parameters: - # validation_string(str): The string to check characters in - # character_set(set): the list of valid characters(or the value "nonascii" as a set entry) - # index_adj(int): the value to adjust the reported indices by, if this isn't the start of a string. - # - # Returns: - # index_list(tuple of (str, int)): The list of problematic characters and indices - # """ - # invalid_positions = [] - # - # # Iterate over the string, check each character - # for i, char in enumerate(validation_string): - # if not re.match(reg_ex, char): - # # If the character does not match, record its position and value - # invalid_positions.append((i, char)) - # - # return invalid_positions - - def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): + def _check_value_class(self, original_tag, stripped_value, report_as): """ Return any issues found if this is a value tag, Parameters: original_tag (HedTag): The original tag that is used to report the error. stripped_value (str): value without units report_as (HedTag): Report as this tag. - error_code(str): The code to override the error as. Again mostly for def/def-expand tags. - index_offset(int): Offset into the extension validate_text starts at. Returns: list: List of dictionaries of validation issues. @@ -197,12 +131,12 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index) if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class return [] - index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) - validation_issues = self.report_value_errors(char_errors, class_valid, report_as, index_adj) + + validation_issues = self.report_value_errors(char_errors, class_valid, report_as) return validation_issues @staticmethod - def report_value_errors(error_dict, class_valid, report_as, index_adj): + def report_value_errors(error_dict, class_valid, report_as): validation_issues = [] for class_name, errors in error_dict.items(): if not errors and class_valid[class_name]: @@ -212,72 +146,28 @@ def report_value_errors(error_dict, class_valid, report_as, index_adj): index_in_tag=0, index_in_tag_end=len(report_as.org_tag), value_class=class_name, tag=report_as) elif errors: - validation_issues.extend(UnitValueValidator.report_value_char_errors(class_name, errors, - report_as, index_adj)) + validation_issues.extend(UnitValueValidator.report_value_char_errors(class_name, errors, report_as)) return validation_issues @staticmethod - def report_value_char_errors(class_name, errors, report_as, index_adj): + def report_value_char_errors(class_name, errors, report_as): validation_issues = [] for value in errors: - index = value[1] + index_adj if value[0] in "{}": validation_issues += ErrorHandler.format_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, - tag=report_as, index_in_tag=index, - index_in_tag_end=index + 1) + tag=report_as, problem_tag=value[0]) else: validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, - value_class=class_name, tag=report_as, - index_in_tag=index, index_in_tag_end=index + 1) + tag=report_as, value_class=class_name, + problem_tag=value[0]) return validation_issues - # @staticmethod - # def report_class_errors(error_dict, report_as): - # validation_issues = [] - # for class_name, errors in error_dict.items(): - - # def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): - - # """ Return any issues found if this is a value tag, - # - # Parameters: - # original_tag (HedTag): The original tag that is used to report the error. - # stripped_value (str): value without units - # report_as (HedTag): Report as this tag. - # error_code(str): The code to override the error as. Again mostly for def/def-expand tags. - # index_offset(int): Offset into the extension validate_text starts at. - # - # Returns: - # list: List of dictionaries of validation issues. - # - # """ - # - # validation_issues = [] - # if original_tag.is_takes_value_tag(): - # report_as = report_as if report_as else original_tag - # problem_indexes = self._get_tag_problem_indexes(original_tag, stripped_value, self._validate_characters) - # for char, index in problem_indexes: - # tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if ( - # char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER - # - # index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) - # index += index_adj + index_offset - # validation_issues += ErrorHandler.format_error(tag_code, - # tag=report_as, index_in_tag=index, - # index_in_tag_end=index + 1) - # if not self._validate_value_class_portion(original_tag, stripped_value): - # validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, report_as) - # if error_code: - # validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, - # report_as, actual_error=error_code) - # return validation_issues @staticmethod - def _check_units(original_tag, bad_units, report_as): - """Returns an issue noting this is either bad units, or missing units + def _report_bad_units(original_tag, report_as): + """Returns an issue noting this is bad units Parameters: original_tag (HedTag): The original tag that is used to report the error. - bad_units (bool): Tag has units so check --- otherwise validate with default units. report_as (HedTag): Report as this tag. Returns: @@ -285,15 +175,8 @@ def _check_units(original_tag, bad_units, report_as): """ report_as = report_as if report_as else original_tag - if bad_units: - tag_unit_class_units = original_tag.get_tag_unit_class_units() - validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, - tag=report_as, units=tag_unit_class_units) - else: - default_unit = original_tag.default_unit - validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_MISSING, - tag=report_as, default_unit=default_unit) - return validation_issue + tag_unit_class_units = original_tag.get_tag_unit_class_units() + return ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, tag=report_as, units=tag_unit_class_units) def _validate_value_class_portion(self, original_tag, portion_to_validate): if portion_to_validate is None: diff --git a/hed/validator/util/dup_util.py b/hed/validator/util/dup_util.py new file mode 100644 index 000000000..e292eb77a --- /dev/null +++ b/hed/validator/util/dup_util.py @@ -0,0 +1,76 @@ +from hed.errors.error_reporter import ErrorHandler +from hed.models.hed_tag import HedTag +from hed.errors.error_types import ValidationErrors + + +class DuplicateChecker: + + def __init__(self): + """ Checker for duplications in HED groups. + + Notes: + This checker has an early out strategy -- it returns when it finds an error. + + """ + self.issues = [] + + def check_for_duplicates(self, group): + """ Find duplicates in a HED group and return the errors found. + + Parameters: + group (HedGroup): The HED group to be checked. + + Returns: + list: List of validation issues -- which might be empty if no duplicates detected. + + + """ + self.issues = [] + self._get_recursive_hash(group) + return self.issues + + def get_hash(self, group): + """ Return the unique hash for the group as long as no duplicates. + + Parameters: + group (HedGroup): The HED group to be checked. + + Returns: + int or None: Unique hash or None if duplicates were detected within the group. + + """ + self.issues = [] + duplication_hash = self._get_recursive_hash(group) + return duplication_hash + + def _get_recursive_hash(self, group): + + if len(self.issues) > 0: + return None + group_hashes = set() + for child in group.children: + if isinstance(child, HedTag): + this_hash = hash(child) + else: + this_hash = self._get_recursive_hash(child) + if len(self.issues) > 0 or this_hash is None: + return None + if this_hash in group_hashes: + self.issues += self._get_duplication_error(child) + return None + group_hashes.add(this_hash) + return hash(frozenset(group_hashes)) + + @staticmethod + def _get_duplication_error(child): + if isinstance(child, HedTag): + return ErrorHandler.format_error(ValidationErrors.HED_TAG_REPEATED, child) + else: + found_group = child + base_steps_up = 0 + while isinstance(found_group, list): + found_group = found_group[0] + base_steps_up += 1 + for _ in range(base_steps_up): + found_group = found_group._parent + return ErrorHandler.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, found_group) diff --git a/hed/validator/util/group_util.py b/hed/validator/util/group_util.py index 87c1ccd84..fce381c2c 100644 --- a/hed/validator/util/group_util.py +++ b/hed/validator/util/group_util.py @@ -1,10 +1,11 @@ """ Validation of the HED tags as strings. """ - from hed.errors.error_reporter import ErrorHandler from hed.models.model_constants import DefTagNames from hed.schema.hed_schema_constants import HedKey from hed.models.hed_tag import HedTag from hed.errors.error_types import ValidationErrors, TemporalErrors +from hed.validator.reserved_checker import ReservedChecker +from hed.validator.util.dup_util import DuplicateChecker class GroupValidator: @@ -21,6 +22,8 @@ def __init__(self, hed_schema): if hed_schema is None: raise ValueError("HedSchema required for validation") self._hed_schema = hed_schema + self._reserved_checker = ReservedChecker.get_instance() + self._duplicate_checker = DuplicateChecker() def run_tag_level_validators(self, hed_string_obj): """ Report invalid groups at each level. @@ -34,17 +37,19 @@ def run_tag_level_validators(self, hed_string_obj): Notes: - This pertains to the top-level, all groups, and nested groups. """ - validation_issues = [] - for original_tag_group, is_top_level in hed_string_obj.get_all_groups(also_return_depth=True): - is_group = original_tag_group.is_group - if not original_tag_group and is_group: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_GROUP_EMPTY, - tag=original_tag_group) - validation_issues += self.check_tag_level_issue(original_tag_group.tags(), is_top_level, is_group) - validation_issues += self._check_for_duplicate_groups(hed_string_obj) - validation_issues += self.validate_duration_tags(hed_string_obj) - return validation_issues + checks = [ + self._check_group_relationships, + self._duplicate_checker.check_for_duplicates, + # self.validate_duration_tags, + ] + + for check in checks: + issues = check(hed_string_obj) # Call each function with `hed_string` + if issues: + return issues + + return [] # Return an empty list if no issues are found def run_all_tags_validators(self, hed_string_obj): """ Report invalid the multi-tag properties in a HED string, e.g. required tags. @@ -64,63 +69,132 @@ def run_all_tags_validators(self, hed_string_obj): # Mostly internal functions to check individual types of errors # =========================================================================+ + def _check_group_relationships(self, hed_string_obj): + """ Check the group relationships + + Parameters: + hed_string_obj (HedString): A HedString object. + + Returns: + list: Issues associated with each level in the HED string. Each issue is a dictionary. + + Notes: + - This pertains to the top-level, all groups, and nested groups. + """ + + for original_tag_group, is_top_level in hed_string_obj.get_all_groups(also_return_depth=True): + is_group = original_tag_group.is_group + + # Check for empty group anywhere this is fatal + if not original_tag_group and is_group: + return ErrorHandler.format_error(ValidationErrors.HED_GROUP_EMPTY, tag=original_tag_group) + + # If a tag should be in a group. If not at the top level, a fatal error occurs. + validation_issues = self.check_tag_level_issue(original_tag_group.tags(), is_top_level, is_group) + if len(validation_issues) > 0: + return validation_issues + + # If the reserved group requirements are not met, this is a fatal error. + validation_issues = self._check_reserved_group_requirements(original_tag_group) + if len(validation_issues) > 0: + return validation_issues + + return [] + + def _check_reserved_group_requirements(self, group): + """ This is called if group is top-level. + + Parameters: + group (HedGroup) - the HED group to test for special tags. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + reserved_tags = self._reserved_checker.get_reserved(group) + if len(reserved_tags) == 0: + return [] + + # Check for compatibility of the reserved tags within this string + validation_issues = self._reserved_checker.check_reserved_compatibility(group, reserved_tags) + if len(validation_issues) > 0: + return validation_issues + + # Check for requires Def tags + validation_issues += self._reserved_checker.check_tag_requirements(group, reserved_tags) + if len(validation_issues) > 0: + return validation_issues + + # validation_errors = self._reserved_checker.check_reserved_duplicates(reserved_tags, group) + return validation_issues + @staticmethod def check_tag_level_issue(original_tag_list, is_top_level, is_group): """ Report tags incorrectly positioned in hierarchy. - Top-level groups can contain definitions, Onset, etc. tags. - Parameters: - original_tag_list (list): HedTags containing the original tags. + original_tag_list (list of HedTag): HedTags containing the original tags. is_top_level (bool): If True, this group is a "top level tag group". is_group (bool): If True group should be contained by parenthesis. Returns: list: Validation issues. Each issue is a dictionary. """ + validation_issues = GroupValidator._check_group_tag_attribute(original_tag_list, is_group) + if len(validation_issues) > 0: + return validation_issues + + top_level_tags = [tag for tag in original_tag_list if tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)] + if not is_top_level: + validation_issues += GroupValidator._check_no_top_tags(top_level_tags) + return validation_issues + + @staticmethod + def _check_no_top_tags(tag_list): + """ Check there are no tags with the top level tag group attribute are in this list. + + Parameters: + tag_list (list of HedTag): List of HedTags in the group + + Returns: + list: Validation issues. Each issue is a dictionary. + + """ validation_issues = [] - top_level_tags = [tag for tag in original_tag_list if - tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)] - tag_group_tags = [tag for tag in original_tag_list if - tag.base_tag_has_attribute(HedKey.TagGroup)] - for tag_group_tag in tag_group_tags: - if not is_group: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=tag_group_tag) - for top_level_tag in top_level_tags: - if not is_top_level: - actual_code = None - if top_level_tag.short_base_tag == DefTagNames.DEFINITION_KEY: - actual_code = ValidationErrors.DEFINITION_INVALID - elif top_level_tag.short_base_tag in DefTagNames.ALL_TIME_KEYS: - actual_code = ValidationErrors.TEMPORAL_TAG_ERROR # May split this out if we switch error - - if actual_code: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, - tag=top_level_tag, - actual_error=actual_code) + for top_level_tag in tag_list: + actual_code = None + if top_level_tag.short_base_tag == DefTagNames.DEFINITION_KEY: + actual_code = ValidationErrors.DEFINITION_INVALID + elif top_level_tag.short_base_tag in DefTagNames.ALL_TIME_KEYS: + actual_code = ValidationErrors.TEMPORAL_TAG_ERROR # May split this out if we switch error + + if actual_code: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, + tag=top_level_tag, + actual_error=actual_code) + else: validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=top_level_tag) + return validation_issues - if is_top_level and len(top_level_tags) > 1: - validation_issue = False - short_tags = {tag.short_base_tag for tag in top_level_tags} - # Verify there's no duplicates, and that if there's two tags they are a delay and temporal tag. - if len(short_tags) != len(top_level_tags): - validation_issue = True - elif DefTagNames.DELAY_KEY not in short_tags or len(short_tags) != 2: - validation_issue = True - else: - short_tags.remove(DefTagNames.DELAY_KEY) - other_tag = next(iter(short_tags)) - if other_tag not in DefTagNames.ALL_TIME_KEYS: - validation_issue = True + @staticmethod + def _check_group_tag_attribute(tag_list, is_group): + """ Check that any tags in a list are in a group if they have tag-group attribute. - if validation_issue: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, - tag=top_level_tags[0], - multiple_tags=top_level_tags[1:]) + Parameters: + tag_list (list of HedTag): List of HedTags in the group + is_group (boolean): True if the tags in tag_list are in parentheses at some level. + + Returns: + list: Validation issues. Each issue is a dictionary. + TODO: Incorporate the + """ + validation_issues = [] + tag_group_tags = [tag for tag in tag_list if tag.base_tag_has_attribute(HedKey.TagGroup)] + for tag_group_tag in tag_group_tags: + if not is_group: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG, + tag=tag_group_tag) return validation_issues def check_for_required_tags(self, tags): @@ -167,7 +241,7 @@ def validate_duration_tags(hed_string_obj): """ Validate Duration/Delay tag groups Parameters: - hed_string_obj (HedString): The hed string to check. + hed_string_obj (HedString): The HED string to check. Returns: list: Issues found in validating durations (i.e., extra tags or groups present, or a group missing) @@ -210,30 +284,3 @@ def _validate_tags_in_hed_string(self, tags): validation_issues += self.check_for_required_tags(tags) validation_issues += self.check_multiple_unique_tags_exist(tags) return validation_issues - - def _check_for_duplicate_groups_recursive(self, sorted_group, validation_issues): - prev_child = None - for child in sorted_group: - if child == prev_child: - if isinstance(child, HedTag): - error_code = ValidationErrors.HED_TAG_REPEATED - validation_issues += ErrorHandler.format_error(error_code, child) - else: - error_code = ValidationErrors.HED_TAG_REPEATED_GROUP - found_group = child - base_steps_up = 0 - while isinstance(found_group, list): - found_group = found_group[0] - base_steps_up += 1 - for _ in range(base_steps_up): - found_group = found_group._parent - validation_issues += ErrorHandler.format_error(error_code, found_group) - if not isinstance(child, HedTag): - self._check_for_duplicate_groups_recursive(child, validation_issues) - prev_child = child - - def _check_for_duplicate_groups(self, original_group): - sorted_group = original_group._sorted() - validation_issues = [] - self._check_for_duplicate_groups_recursive(sorted_group, validation_issues) - return validation_issues diff --git a/hed/validator/util/string_util.py b/hed/validator/util/string_util.py index d5639807f..c4aa19732 100644 --- a/hed/validator/util/string_util.py +++ b/hed/validator/util/string_util.py @@ -21,7 +21,7 @@ def check_count_tag_group_parentheses(hed_string): """ Report unmatched parentheses. Parameters: - hed_string (str): A hed string. + hed_string (str): A HED string. Returns: list: A list of validation list. Each issue is a dictionary. @@ -39,7 +39,7 @@ def check_delimiter_issues_in_hed_string(self, hed_string): """ Report missing commas or commas in value tags. Parameters: - hed_string (str): A hed string. + hed_string (str): A HED string. Returns: list: A validation issues list. Each issue is a dictionary. diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index e0b5aee25..0cc871615 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -21,6 +21,8 @@ # "character-invalid-non-printing appears": "Need to recheck how this is verified for textClass", "invalid-character-name-value-class-deprecated": "Removing support for 8.2.0 or earlier name classes" } +runAll = True +runOnly = {} class MyTestCase(unittest.TestCase): @@ -28,6 +30,7 @@ class MyTestCase(unittest.TestCase): def setUpClass(cls): test_dir = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hed-specification/tests/json_tests')) + cls.test_dir = test_dir cls.test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, f))] cls.fail_count = [] @@ -38,7 +41,7 @@ def setUpClass(cls): def tearDownClass(cls): pass - def run_single_test(self, test_file): + def run_single_test(self, test_file, test_name=None, test_type=None): with open(test_file, "r") as fp: test_info = json.load(fp) for info in test_info: @@ -51,9 +54,9 @@ def run_single_test(self, test_file): if name in skip_tests: print(f"Skipping {name} test because: {skip_tests[name]}") continue - - # if name != "library-invalid-bad_with-standard-version": - # continue + if test_name is not None and name != test_name: + print(f"Skipping {name} test because it is not the one specified") + continue description = info['description'] schema = info['schema'] check_for_warnings = info.get("warning", False) @@ -76,6 +79,8 @@ def run_single_test(self, test_file): else: def_dict = DefinitionDict() for section_name, section in info["tests"].items(): + if test_type is not None and test_type != section_name: + continue if section_name == "string_tests": self._run_single_string_test(section, schema, def_dict, error_code, all_codes, description, name, error_handler) @@ -128,7 +133,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, all_codes, def _run_single_sidecar_test(self, info, schema, def_dict, error_code, all_codes, description, name, error_handler): for result, tests in info.items(): for test in tests: - # print(f"{error_code}: {name}") buffer = io.BytesIO(json.dumps(test).encode("utf-8")) sidecar = Sidecar(buffer) issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler) @@ -208,13 +212,17 @@ def _run_single_schema_test(self, info, error_code, all_codes, description, name def test_errors(self): for test_file in self.test_files: self.run_single_test(test_file) - # test_file = './temp.json' - # self.run_single_test(test_file) + print(f"{len(self.fail_count)} tests got an unexpected result") print("\n".join(self.fail_count)) self.assertEqual(len(self.fail_count), 0) + # def test_debug(self): + # test_file = os.path.realpath('./temp6.json') + # test_name = None + # test_type = None + # self.run_single_test(test_file, test_name, test_type) -if __name__ == '__main__': +if __name__ == '__main__': unittest.main() diff --git a/tests/data/spreadsheet_validator_tests/ExcelMultipleSheets1.tsv b/tests/data/spreadsheet_validator_tests/ExcelMultipleSheets1.tsv new file mode 100644 index 000000000..c0f368fa7 --- /dev/null +++ b/tests/data/spreadsheet_validator_tests/ExcelMultipleSheets1.tsv @@ -0,0 +1,5 @@ +Event code Short label Long name Description in text HED tags +251 PerturbLeft PerturbCarToLeft Vehicle undergoes a perturbation to left. "Experiment-control, Experimental-stimulus, (Controller-agent, (Operate, Car, (Turn, Leftward)))" +252 PerturbRight PerturbCarToRight Vehicle undergoes a perturbation to right. "Experiment-control, Experimental-stimulus, (Controller-agent, (Operate, Car, (Turn, Rightward)))" +253 ResponseOnset DriverStartsToCorrect Subject starts to respond to perturbation by steering vehicle back to center of the lane. "Agent-action, Participant-response, Correction, ((Human-agent, Experiment-participant), (Modify, (Car,Angle)))" +254 ResponseOffset DriverStopsCorrecting "Subject completes response to perturbation having steered the vehicle back to the center of the lane. Normally this would be tagged with temporal scope, but avoiding definitions here." "Agent-action, Participant-response, (Halt, Correction)" diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 10130594a..f0b0f9de8 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -20,9 +20,6 @@ def check_def_base(self, test_strings, expected_issues): expected_params = expected_issues[test_key] expected_issue = self.format_errors_fully(ErrorHandler(), hed_string=hed_string_obj, params=expected_params) - # print(test_key) - # print(test_issues) - # print(expected_issue) self.assertCountEqual(test_issues, expected_issue, HedString(test_strings[test_key], self.hed_schema)) diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 247a073b4..d682ad597 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -178,11 +178,11 @@ def test_basic_expand_detection(self): # all simple cases with no duplicates test_strings = [ "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/3))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/3))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", - "(Def-expand/C3/5, (Action/5, Joyful, Event))", - "(Def-expand/C3/6, (Action/6, Joyful, Event))" + "(Def-expand/A1/2, (Action/1, Acceleration/5, Item-count/3))", + "(Def-expand/B2/3, (Label/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Label/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Label/5, Joyful, Event))", + "(Def-expand/C3/6, (Label/6, Joyful, Event))" ] process_def_expands(test_strings, self.schema) @@ -190,26 +190,26 @@ def test_mixed_detection(self): # Cases where you can only retroactively identify the first def-expand test_strings = [ # Basic example first just to verify - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/2))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", + "(Def-expand/A1/1, (Label/1, Acceleration/5, Item-count/2))", + "(Def-expand/A1/2, (Label/2, Acceleration/5, Item-count/2))", # Out of order ambiguous - "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Acceleration/3))", + "(Def-expand/B2/3, (Label/3, Collection/animals, Acceleration/3))", + "(Def-expand/B2/4, (Label/4, Collection/animals, Acceleration/3))", # Multiple tags - "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", - "(Def-expand/C3/6, (Action/6, Acceleration/5, Item-count/5))", + "(Def-expand/C3/5, (Label/5, Acceleration/5, Item-count/5))", + "(Def-expand/C3/6, (Label/6, Acceleration/5, Item-count/5))", # Multiple tags2 - "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", - "(Def-expand/D4/8, (Action/8, Acceleration/7, Item-count/8))" + "(Def-expand/D4/7, (Label/7, Acceleration/7, Item-count/8))", + "(Def-expand/D4/8, (Label/8, Acceleration/7, Item-count/8))" # Multiple tags3 - "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", - "(Def-expand/D5/8, (Action/8, Acceleration/7, Item-count/8, Event))" + "(Def-expand/D5/7, (Label/7, Acceleration/7, Item-count/8, Event))", + "(Def-expand/D5/8, (Label/8, Acceleration/7, Item-count/8, Event))" ] - def_dict, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) + def_dict, ambiguous_defs, errors = process_def_expands(test_strings, self.schema) self.assertEqual(len(def_dict), 5) - def test_ambiguous_defs(self): - # Cases that can't be identified + def test_error_double_defs(self): + # One case can't be identified. Action doesn't count -- it doesn't take value. test_strings = [ "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-count/2))", "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", @@ -217,6 +217,20 @@ def test_ambiguous_defs(self): "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", ] + def_dict, ambiguous_defs, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(ambiguous_defs), 1) + self.assertEqual(len(def_dict), 4) + self.assertEqual(len(errors), 0) + + def test_ambiguous_defs(self): + # Cases that can't be identified + test_strings = [ + "(Def-expand/A1/2, (Label/2, Acceleration/5, Item-count/2))", + "(Def-expand/B2/3, (Label/3, Collection/animals, Acceleration/3))", + "(Def-expand/C3/5, (Label/5, Acceleration/5, Item-count/5))", + "(Def-expand/D4/7, (Label/7, Acceleration/7, Item-count/8))", + "(Def-expand/D5/7, (Label/7, Acceleration/7, Item-count/8, Event))", + ] _, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) self.assertEqual(len(ambiguous_defs), 5) @@ -237,24 +251,35 @@ def test_ambiguous_conflicting_defs(self): def test_errors(self): # Basic recognition of conflicting errors test_strings = [ - "(Def-expand/A1/1, (Action/1, Age/5, Item-count/2))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", - "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", + "(Def-expand/A1/1, (Acceleration/1, Age/5, Item-count/2))", + "(Def-expand/A1/2, (Acceleration/2, Age/5, Item-count/2))", + "(Def-expand/A1/3, (Acceleration/3, Age/5, Item-count/3))", ] _, _, errors = process_def_expands(test_strings, self.schema) self.assertEqual(len(errors), 1) - def test_errors_ambiguous(self): + def test_errors(self): # Verify we recognize errors when we had a def that can't be resolved. test_strings = [ - "(Def-expand/A1/1, (Action/1, Age/5, Item-count/1))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", - "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", + "(Def-expand/A1/1, (Acceleration/1, Age/5, Item-count/1))", + "(Def-expand/A1/2, (Acceleration/2, Age/5, Item-count/3))", + "(Def-expand/A1/3, (Acceleration/2, Age/5, Item-count/3))", ] known, ambiguous, errors = process_def_expands(test_strings, self.schema) self.assertEqual(len(errors), 1) self.assertEqual(len(errors["a1"]), 3) + def test_errors_ambiguous(self): + # Verify we recognize errors when we had a def that can't be resolved. + test_strings = [ + "(Def-expand/A1/1, (Acceleration/1, Age/5, Item-count/1))", + "(Def-expand/A1/2, (Acceleration/2, Age/5, Item-count/2))", + "(Def-expand/A1/3, (Acceleration/3, Age/5, Item-count/3))", + ] + known, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(errors), 1) + self.assertEqual(len(ambiguous), 0) + def test_errors_unresolved(self): # Verify we recognize errors when we had a def that can't be resolved. test_strings = [ @@ -263,54 +288,53 @@ def test_errors_unresolved(self): ] known, ambiguous, errors = process_def_expands(test_strings, self.schema) self.assertEqual(len(errors), 1) - self.assertEqual(len(errors["a1"]), 2) def test_def_expand_detection(self): test_strings = [ - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-Count/2))", - "(Def-expand/A1/2, (Action/2, Acceleration/5, Item-Count/2))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", - "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", - "(Def-expand/C3/5, (Action/5, Joyful, Event))", - "(Def-expand/C3/6, (Action/6, Joyful, Event))", - "((Def-expand/A1/7, (Action/7, Acceleration/5, Item-Count/2)), Event, Acceleration/10)", - "((Def-expand/A1/8, (Action/8, Acceleration/5, Item-Count/2)), Collection/toys, Item-Count/5)", - "((Def-expand/B2/9, (Action/9, Collection/animals, Alert)), Event, Collection/plants)", - "((Def-expand/B2/10, (Action/10, Collection/animals, Alert)), Joyful, Item-Count/3)", - "((Def-expand/C3/11, (Action/11, Joyful, Event)), Collection/vehicles, Acceleration/20)", - "((Def-expand/C3/12, (Action/12, Joyful, Event)), Alert, Item-Count/8)", - "((Def-expand/A1/13, (Action/13, Acceleration/5, Item-Count/2)), " + - "(Def-expand/B2/13, (Action/13, Collection/animals, Alert)), Event)", - "((Def-expand/A1/14, (Action/14, Acceleration/5, Item-Count/2)), Joyful, " + - "(Def-expand/C3/14, (Action/14, Joyful, Event)))", - "(Def-expand/B2/15, (Action/15, Collection/animals, Alert)), (Def-expand/C3/15, " + - "(Action/15, Joyful, Event)), Acceleration/30", - "((Def-expand/A1/16, (Action/16, Acceleration/5, Item-Count/2)), " + - "(Def-expand/B2/16, (Action/16, Collection/animals, Alert)), Collection/food)", - "(Def-expand/C3/17, (Action/17, Joyful, Event)), (Def-expand/A1/17, " + - "(Action/17, Acceleration/5, Item-Count/2)), Item-Count/6", - "((Def-expand/B2/18, (Action/18, Collection/animals, Alert)), " + - "(Def-expand/C3/18, (Action/18, Joyful, Event)), Alert)", - "(Def-expand/D1/Apple, (Task/Apple, Collection/cars, Attribute/color))", - "(Def-expand/D1/Banana, (Task/Banana, Collection/cars, Attribute/color))", - "(Def-expand/E2/Carrot, (Collection/Carrot, Collection/plants, Attribute/type))", - "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Attribute/type))", - "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Attribute/color)), " + - "(Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Attribute/type)), Event)", - "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Attribute/color)), " + - "Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Attribute/type)))", - "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color)), " + - "(Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Acceleration/30", - "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Attribute/length))", - "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Attribute/length))", - "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Attribute/material))", - "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Attribute/material))", - "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Attribute/length)), " + - "(Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Attribute/material)), Event)", - "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Attribute/length)), Joyful, " + - "(Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Attribute/material)))", - "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length)), " + - "(Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material))" + "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/1))", + "(Def-expand/A1/2, (Action/1, Acceleration/5, Item-count/2))", + "(Def-expand/B2/3, (Item-count/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Item-count/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Item-count/5, Joyful, Event))", + "(Def-expand/C3/6, (Item-count/6, Joyful, Event))", + "((Def-expand/A1/7, (Item-count/7, Acceleration/5, Action/1)), Event, Acceleration/10)", + "((Def-expand/A1/8, (Item-count/8, Acceleration/5, Action/1)), Collection/toys, Item-Count/5)", + "((Def-expand/B2/9, (Item-count/9, Collection/animals, Alert)), Event, Collection/plants)", + "((Def-expand/B2/10, (Item-count/10, Collection/animals, Alert)), Joyful, Item-Count/3)", + "((Def-expand/C3/11, (Item-count/11, Joyful, Event)), Collection/vehicles, Acceleration/20)", + "((Def-expand/C3/12, (Item-count/12, Joyful, Event)), Alert, Item-Count/8)", + "((Def-expand/A1/13, (Item-count/13, Acceleration/5, Action/1)), " + + "(Def-expand/B2/13, (Item-count/13, Collection/animals, Alert)), Event)", + "((Def-expand/A1/14, (Item-count/14, Acceleration/5, Action/1), Joyful, " + + "(Def-expand/C3/14, (Item-count/14, Joyful, Event)))", + "(Def-expand/B2/15, (Item-count/15, Collection/animals, Alert)), (Def-expand/C3/15, " + + "(Item-count/15, Joyful, Event)), Acceleration/30", + "((Def-expand/A1/16, (Item-count/16, Acceleration/5, Action/1)), " + + "(Def-expand/B2/16, (Item-count/16, Collection/animals, Alert)), Collection/food)", + "(Def-expand/C3/17, (Item-count/17, Joyful, Event)), (Def-expand/A1/17, " + + "(Action/1, Acceleration/5, Item-Count/17)), Item-Count/6", + "((Def-expand/B2/18, (Item-count/18, Collection/animals, Alert)), " + + "(Def-expand/C3/18, (Item-count/18, Joyful, Event)), Alert)", + "(Def-expand/D1/Apple, (Task/Apple, Collection/cars, Red))", + "(Def-expand/D1/Banana, (Task/Banana, Collection/cars, Red))", + "(Def-expand/E2/Carrot, (Collection/Carrot, Collection/plants, Collection/Baloney))", + "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Collection/Baloney))", + "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Red)), " + + "(Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Collection/Baloney)), Event)", + "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Red)), " + + "Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Collection/Baloney)))", + "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Red)), " + + "(Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Collection/Baloney)), Acceleration/30", + "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Length/5))", + "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Length/5))", + "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Item))", + "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Item))", + "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Length/5)), " + + "(Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Item)), Event)", + "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Length/5)), Joyful, " + + "(Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Item)))", + "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Length/5)), " + + "(Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Item))" ] def_dict, ambiguous, errors = process_def_expands(test_strings, self.schema) diff --git a/tests/models/test_hed_tag.py b/tests/models/test_hed_tag.py index e1b62ac58..19ad3204a 100644 --- a/tests/models/test_hed_tag.py +++ b/tests/models/test_hed_tag.py @@ -122,44 +122,31 @@ def test_determine_tags_legal_units(self): self.assertEqual(no_unit_class_tag_result, []) def test_strip_off_units_from_value(self): - # todo: add this back in when we have a currency unit or make a test for one. - # dollars_string_no_space = HedTag('Participant/Effect/Cognitive/Reward/$25.99', schema=self.schema) - # dollars_string = HedTag('Participant/Effect/Cognitive/Reward/$ 25.99', schema=self.schema) - # dollars_string_invalid = HedTag('Participant/Effect/Cognitive/Reward/25.99$', schema=self.schema) volume_string_no_space = HedTag('Volume/100m^3', hed_schema=self.hed_schema) volume_string = HedTag('Volume/100 m^3', hed_schema=self.hed_schema) prefixed_volume_string = HedTag('Volume/100 cm^3', hed_schema=self.hed_schema) invalid_volume_string = HedTag('Volume/200 cm', hed_schema=self.hed_schema) invalid_distance_string = HedTag('Distance/200 M', hed_schema=self.hed_schema) - # currency_units = { - # 'currency':self.schema.unit_classes['currency'] - # } - volume_units = { - 'volume': self.hed_schema.unit_classes['volumeUnits'] - } - distance_units = { - 'distance': self.hed_schema.unit_classes['physicalLengthUnits'] - } - # stripped_dollars_string_no_space = dollars_string_no_space._get_tag_units_portion(currency_units) - # stripped_dollars_string = dollars_string._get_tag_units_portion(currency_units) - # stripped_dollars_string_invalid = dollars_string_invalid._get_tag_units_portion(currency_units) + volume_units = {'volume': self.hed_schema.unit_classes['volumeUnits']} + distance_units = { 'distance': self.hed_schema.unit_classes['physicalLengthUnits']} stripped_volume_string, _, _ = HedTag._get_tag_units_portion(volume_string.extension, volume_units) stripped_volume_string_no_space, _, _ = HedTag._get_tag_units_portion(volume_string_no_space.extension, volume_units) stripped_prefixed_volume_string, _, _ = HedTag._get_tag_units_portion(prefixed_volume_string.extension, volume_units) - stripped_invalid_volume_string, _, _ = HedTag._get_tag_units_portion(invalid_volume_string.extension, - volume_units) - stripped_invalid_distance_string, _, _ = HedTag._get_tag_units_portion(invalid_distance_string.extension, + stripped_invalid_volume_string, units_invalid, unit_entry_invalid = ( + HedTag._get_tag_units_portion(invalid_volume_string.extension, volume_units)) + stripped_invalid_distance_string, dist_invalid_units, dist_invalid_entry = HedTag._get_tag_units_portion(invalid_distance_string.extension, distance_units) - # self.assertEqual(stripped_dollars_string_no_space, None) - # self.assertEqual(stripped_dollars_string, '25.99') - # self.assertEqual(stripped_dollars_string_invalid, None) self.assertEqual(stripped_volume_string, '100') - self.assertEqual(stripped_volume_string_no_space, None) + self.assertEqual(stripped_volume_string_no_space, '100m^3') self.assertEqual(stripped_prefixed_volume_string, '100') - self.assertEqual(stripped_invalid_volume_string, None) - self.assertEqual(stripped_invalid_distance_string, None) + self.assertEqual(stripped_invalid_volume_string, '200') + self.assertEqual(units_invalid, 'cm') + self.assertEqual(unit_entry_invalid, None) + self.assertEqual(stripped_invalid_distance_string, '200') + self.assertEqual(dist_invalid_units, 'M') + self.assertEqual(dist_invalid_entry, None) def test_determine_allows_extensions(self): extension_tag1 = HedTag('boat', hed_schema=self.hed_schema) @@ -189,4 +176,4 @@ def test_get_as_default_units(self): self.assertEqual(300, tag4.value_as_default_unit()) tag5 = HedTag("IntensityTakesValue/300 cd", hed_schema=util_create_schemas.load_schema_intensity()) - self.assertEqual(None, tag5.value_as_default_unit()) + self.assertEqual(300, tag5.value_as_default_unit()) diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index 4264c5b53..d817dd704 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -58,32 +58,22 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder onset_issues += def_validator.validate_onset_offset(test_string) if not onset_issues: onset_issues += onset_validator.validate_temporal_relations(test_string) - onset_issues += GroupValidator.validate_duration_tags(test_string) error_handler.add_context_and_filter(onset_issues) test_string.shrink_defs() issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params) - # print(str(test_string)) - # print(str(onset_issues)) - # print(str(issues)) - # print(onset_validator._onsets) + error_handler.pop_error_context() self.assertEqual(len(onset_validator._onsets), context) self.assertCountEqual(onset_issues, issues) def _test_issues_no_context(self, test_strings, test_issues): hed_validator = HedValidator(self.hed_schema, self.def_dict_both) - for string, expected_params in zip(test_strings, test_issues): - test_string = HedString(string, self.hed_schema) - error_handler = ErrorHandler(check_for_warnings=False) - error_handler.push_error_context(ErrorContext.HED_STRING, test_string) + for index, this_test in enumerate(test_strings): + test_string = HedString(this_test, self.hed_schema) onset_issues = hed_validator.validate(test_string, False) - error_handler.add_context_and_filter(onset_issues) - issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params) - # print(str(onset_issues)) - # print(str(issues)) - error_handler.pop_error_context() - self.assertCountEqual(onset_issues, issues) + filtered_issues = self.filter_issues(onset_issues) + self.assertCountEqual(filtered_issues, test_issues[index]) def test_basic_onset_errors(self): test_strings = [ @@ -275,16 +265,14 @@ def test_onset_multiple_or_misplaced_errors(self): f"({self.placeholder_label_def_string},Onset, Offset)", ] test_issues = [ - self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, - actual_error=ValidationErrors.TEMPORAL_TAG_ERROR) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1, multiple_tags=["Onset"]) - + self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=2) - + self.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2, - def_tag="Def/TestDefPlaceholder/2471"), - self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1, multiple_tags=["Offset"]) - + self.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2, - def_tag="Def/TestDefPlaceholder/2471"), + [{'code': 'TEMPORAL_TAG_ERROR', + 'message': 'Tag "Onset" must be in a top level group but was found in another location.', 'severity': 1}], + [{'code': 'TAG_GROUP_ERROR', + 'message': 'Repeated reserved tag "Onset" or multiple reserved tags in group "(Def/TestDefPlaceholder/2471,Onset,Onset)"', + 'severity': 1}], + [{'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Offset" is not allowed with the other tag(s) or Def-expand sub-group in group "(Def/TestDefPlaceholder/2471,Onset,Offset)"', + 'severity': 1}] ] self._test_issues_no_context(test_strings, test_issues) @@ -320,7 +308,7 @@ def test_onset_two_in_one_line(self): self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False) def test_check_for_banned_tags(self): - hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema) + hed_string = HedString("Event, (Delay/5, (Label/Example))", self.hed_schema) issues = OnsetValidator.check_for_banned_tags(hed_string) self.assertEqual(len(issues), 1) @@ -328,9 +316,9 @@ def test_check_for_banned_tags(self): issues = OnsetValidator.check_for_banned_tags(hed_string) self.assertEqual(len(issues), 2) - hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema) + hed_string = HedString("(Onset, Duration/5.0), Label/Example", self.hed_schema) issues = OnsetValidator.check_for_banned_tags(hed_string) - self.assertEqual(len(issues), 2) + self.assertEqual(len(issues), 1) if __name__ == '__main__': diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 5a76cdef2..db06f1e23 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -32,7 +32,7 @@ def test_multicategory_refs(self): issues = sidecar.validate(self.hed_schema) # 3 issues are expected for repeated tags from stacking lines - self.assertEqual(len(issues), 3) + self.assertEqual(len(issues), 2) refs = sidecar.get_column_refs() self.assertEqual(len(refs), 2) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 557660d33..40fea653d 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -3,6 +3,7 @@ from hed.errors.error_types import ValidationErrors, DefinitionErrors from tests.validator.test_tag_validator_base import TestValidatorBase from hed.schema.hed_schema_io import load_schema_version +from hed import HedTag, HedString from functools import partial @@ -163,7 +164,7 @@ def test_child_required(self): def test_required_units(self): test_strings = { 'hasRequiredUnit': 'Duration/3 ms', - 'missingRequiredUnit': 'Duration/3', + 'missingUnit': 'Duration/3', 'notRequiredNoNumber': 'Age', 'notRequiredNumber': 'Age/0.5', 'notRequiredScientific': 'Age/5.2e-1', @@ -173,7 +174,7 @@ def test_required_units(self): } expected_results = { 'hasRequiredUnit': True, - 'missingRequiredUnit': False, + 'missingUnit': True, 'notRequiredNoNumber': True, 'notRequiredNumber': True, 'notRequiredScientific': True, @@ -183,8 +184,7 @@ def test_required_units(self): # legal_clock_time_units = ['hour:min', 'hour:min:sec'] expected_issues = { 'hasRequiredUnit': [], - 'missingRequiredUnit': self.format_error(ValidationErrors.UNITS_MISSING, tag=0, - default_unit='s'), + 'missingUnit': [], 'notRequiredNoNumber': [], 'notRequiredNumber': [], 'notRequiredScientific': [], @@ -436,21 +436,31 @@ def test_no_duplicates(self): } from hed import HedString expected_issues = { - 'topLevelDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=1), - 'groupDuplicate': self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=3), + 'topLevelDuplicate': [ + {'code': 'TAG_EXPRESSION_REPEATED', 'message': 'Repeated tag - "Event/Sensory-event"', 'severity': 1} + ], + 'groupDuplicate': [ + {'code': 'TAG_EXPRESSION_REPEATED', 'message': 'Repeated tag - "Event/Sensory-event"', 'severity': 1} + ], 'legalDuplicate': [], 'noDuplicate': [], - 'duplicateGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Sensory-event, Man-made-object/VehicleTrain)", - self.hed_schema)), - 'duplicateSubGroup': self.format_error( - ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Event,(Sensory-event,Man-made-object/VehicleTrain))", self.hed_schema)), - 'duplicateSubGroupF': self.format_error( - ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)", self.hed_schema)), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) + 'duplicateGroup': [ + {'code': 'TAG_EXPRESSION_REPEATED', + 'message': 'Repeated group - "(Man-made-object/VehicleTrain,Sensory-event)"', + 'severity': 1} + ], + 'duplicateSubGroup': [ + {'code': 'TAG_EXPRESSION_REPEATED', + 'message': 'Repeated group - "(Event,(Man-made-object/VehicleTrain,Sensory-event))"', + 'severity': 1} + ], + 'duplicateSubGroupF': [ + {'code': 'TAG_EXPRESSION_REPEATED', + 'message': 'Repeated group - "((Man-made-object/VehicleTrain,Sensory-event),Event)"', + 'severity': 1} + ], + } + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) def test_no_duplicates_semantic(self): test_strings = { @@ -467,9 +477,20 @@ def test_no_duplicates_semantic(self): } self.validator_semantic(test_strings, expected_results, expected_issues, False) - def test_topLevelTagGroup_validation(self): + def test_temp_validation(self): + test_strings = { + 'valid1': 'Event, (Event)', + } + expected_results = { + 'valid1': True, + } + expected_issues = { + 'valid1': [] + } + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) + + def test_topLevelTagGroup_validation_new(self): test_strings = { - 'invalid1': 'Definition/InvalidDef', 'valid1': '(Definition/ValidDef)', 'valid2': '(Definition/ValidDef), (Definition/ValidDef2)', 'invalid2': '(Event, (Definition/InvalidDef2))', @@ -478,12 +499,11 @@ def test_topLevelTagGroup_validation(self): 'valid2TwoInOne': '(Duration/5.0 s, Delay, (Event))', 'invalid3InOne': '(Duration/5.0 s, Delay, Onset, (Event))', 'invalidDuration': '(Duration/5.0 s, Onset, (Event))', - 'validDelay': '(Delay, Onset, (Event))', + 'invalidDelay': '(Delay, Onset, (Event))', 'invalidDurationPair': '(Duration/5.0 s, Duration/3.0 s, (Event))', - 'invalidDelayPair': '(Delay/3.0 s, Delay, (Event))', + 'invalidDelayPair': '(Delay/3.0 s, Delay/2.0 s, (Event))', } expected_results = { - 'invalid1': False, 'valid1': True, 'valid2': True, 'invalid2': False, @@ -492,35 +512,114 @@ def test_topLevelTagGroup_validation(self): 'valid2TwoInOne': True, 'invalid3InOne': False, 'invalidDuration': False, - 'validDelay': True, + 'invalidDelay': False, 'invalidDurationPair': False, 'invalidDelayPair': False, } expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0, - actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), 'valid1': [], 'valid2': [], - 'invalid2': self.format_error( - ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + \ - self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - 'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Definition/InvalidDef3".split(", ")), - 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Onset".split(", ")), + 'invalid2': [ + {'code': 'DEFINITION_INVALID', + 'message': 'Tag "Definition/InvalidDef2" must be in a top level group but was found in another location.', + 'severity': 1} + ], + 'invalidTwoInOne': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Repeated reserved tag "Definition/InvalidDef3" or multiple reserved tags in group "(Definition/InvalidDef2,Definition/InvalidDef3)"', + 'severity': 1} + ], + 'invalid2TwoInOne': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Onset" is not allowed with the other tag(s) or Def-expand sub-group in group "(Definition/InvalidDef2,Onset)"', + 'severity': 1} + ], 'valid2TwoInOne': [], - 'invalid3InOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Delay, Onset".split(", ")), - 'invalidDuration': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Onset".split(", ")), - 'validDelay': [], - 'invalidDurationPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Duration/3.0 s".split(", ")), - 'invalidDelayPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="Delay".split(", ")), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) + 'invalid3InOne': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Onset" is not allowed with the other tag(s) or Def-expand sub-group in group "(Duration/5.0 s,Delay,Onset,(Event))"', + 'severity': 1} + ], + 'invalidDuration': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Onset" is not allowed with the other tag(s) or Def-expand sub-group in group "(Duration/5.0 s,Onset,(Event))"', + 'severity': 1} + ], + 'invalidDelay': [ + {'code': 'TEMPORAL_TAG_ERROR', + 'message': "'Onset' tag has no def tag or def-expand group or too many when 1 is required in string.", + 'severity': 1} + ], + 'invalidDurationPair': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Repeated reserved tag "Duration/3.0 s" or multiple reserved tags in group "(Duration/5.0 s,Duration/3.0 s,(Event))"', + 'severity': 1} + ], + 'invalidDelayPair': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Repeated reserved tag "Delay/2.0 s" or multiple reserved tags in group "(Delay/3.0 s,Delay/2.0 s,(Event))"', + 'severity': 1} + ], + } + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) + + # def test_topLevelTagGroup_validation(self): + # test_strings = { + # 'invalid1': 'Definition/InvalidDef', + # 'valid1': '(Definition/ValidDef)', + # 'valid2': '(Definition/ValidDef), (Definition/ValidDef2)', + # 'invalid2': '(Event, (Definition/InvalidDef2))', + # 'invalidTwoInOne': '(Definition/InvalidDef2, Definition/InvalidDef3)', + # 'invalid2TwoInOne': '(Definition/InvalidDef2, Onset)', + # 'valid2TwoInOne': '(Duration/5.0 s, Delay, (Event))', + # 'invalid3InOne': '(Duration/5.0 s, Delay, Onset, (Event))', + # 'invalidDuration': '(Duration/5.0 s, Onset, (Event))', + # 'validDelay': '(Delay, Onset, (Event))', + # 'invalidDurationPair': '(Duration/5.0 s, Duration/3.0 s, (Event))', + # 'invalidDelayPair': '(Delay/3.0 s, Delay, (Event))', + # } + # expected_results = { + # 'invalid1': False, + # 'valid1': True, + # 'valid2': True, + # 'invalid2': False, + # 'invalidTwoInOne': False, + # 'invalid2TwoInOne': False, + # 'valid2TwoInOne': True, + # 'invalid3InOne': False, + # 'invalidDuration': False, + # 'validDelay': True, + # 'invalidDurationPair': False, + # 'invalidDelayPair': False, + # } + # expected_issues = { + # 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0, + # actual_error=ValidationErrors.DEFINITION_INVALID) + # + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), + # 'valid1': [], + # 'valid2': [], + # 'invalid2': + # self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, + # actual_error=ValidationErrors.DEFINITION_INVALID) + + # self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), + # 'invalidTwoInOne': self.format_error(ValidationErrors.HED_RESERVED_TAG_REPEATED, + # tag=HedTag("Definition/InvalidDef3", hed_schema=self.hed_schema), + # group=HedString("(Definition/InvalidDef2, Definition/InvalidDef3)", + # hed_schema=self.hed_schema)), + # 'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + # multiple_tags="Onset".split(", ")), + # 'valid2TwoInOne': [], + # 'invalid3InOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + # multiple_tags="Delay, Onset".split(", ")), + # 'invalidDuration': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + # multiple_tags="Onset".split(", ")), + # 'validDelay': [], + # 'invalidDurationPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + # multiple_tags="Duration/3.0 s".split(", ")), + # 'invalidDelayPair': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, + # multiple_tags="Delay".split(", ")), + # } + # self.validator_semantic(test_strings, expected_results, expected_issues, False) def test_taggroup_validation(self): test_strings = { @@ -545,19 +644,32 @@ def test_taggroup_validation(self): 'semivalid2': True, } expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=0), - 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=0), - 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=2), + 'invalid1': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], + 'invalid2': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], + 'invalid3': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], 'valid1': [], 'valid2': [], 'valid3': [], 'semivalid1': [], - 'semivalid2': [] + 'semivalid2': [ + {'code': 'TEMPORAL_TAG_ERROR', + 'message': "'Onset' tag has no def tag or def-expand group or too many when 1 is required in string.", + 'severity': 1} + ] } - self.validator_semantic(test_strings, expected_results, expected_issues, False) + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) def test_empty_groups(self): test_strings = { @@ -990,37 +1102,6 @@ class TestHedSpecialUnits(TestHed): def string_obj_func(validator): return partial(validator._validate_individual_tags_in_hed_string) - def test_special_units(self): - test_strings = { - 'specialAllowedCharCurrency': 'Item/Currency-test/$ 100', - 'specialNotAllowedCharCurrency': 'Item/Currency-test/@ 100', - 'specialAllowedCharCurrencyAsSuffix': 'Item/Currency-test/100 $', - # Update tests - 8.0 currently has no clockTime nodes. - # 'properTime': 'Item/clockTime-test/08:30', - # 'invalidTime': 'Item/clockTime-test/54:54' - } - expected_results = { - # 'properTime': True, - # 'invalidTime': True, - 'specialAllowedCharCurrency': True, - 'specialNotAllowedCharCurrency': False, - 'specialAllowedCharCurrencyAsSuffix': False, - } - legal_currency_units = ['dollar', "$", "point"] - - expected_issues = { - # 'properTime': [], - # 'invalidTime': [], - 'specialAllowedCharCurrency': [], - 'specialNotAllowedCharCurrency': self.format_error("INVALID_VALUE_CLASS_VALUE", - value_class="numericClass", tag=0, index_in_tag=0, - index_in_tag_end=24) - + self.format_error(ValidationErrors.UNITS_INVALID, tag=0, units=legal_currency_units), - 'specialAllowedCharCurrencyAsSuffix': self.format_error(ValidationErrors.UNITS_INVALID, tag=0, - units=legal_currency_units), - } - self.validator_semantic(test_strings, expected_results, expected_issues, True) - class TestHedAllowedCharacters(TestHed): compute_forms = True @@ -1045,7 +1126,7 @@ def test_special_units(self): expected_issues = { 'ascii': [], 'illegalTab': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=0, - index_in_tag=13, index_in_tag_end=14, value_class="textClass"), + problem_tag='\t', value_class="textClass"), 'allowTab': [] } self.validator_semantic(test_strings, expected_results, expected_issues, True) diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py index 8b816f5b4..a3b990181 100644 --- a/tests/validator/test_tag_validator_base.py +++ b/tests/validator/test_tag_validator_base.py @@ -42,6 +42,11 @@ def format_error(self, error_type, *args, **kwargs): # return params return [params] + def filter_issues(self, issue_list): + if not issue_list: + return [] + return [{key: d[key] for key in ('code', 'message', 'severity') if key in d} for d in issue_list] + def format_errors_fully(self, error_handler, hed_string, params): formatted_errors = [] for code, args, kwargs in params: @@ -87,15 +92,35 @@ def validator_base(self, test_strings, expected_results, expected_issues, test_f params=expected_params) error_handler.add_context_and_filter(test_issues) test_result = not test_issues - - # print(str(expected_issue)) - # print(str(test_issues)) - # error_handler.pop_error_context() self.assertEqual(test_result, expected_result, test_strings[test_key]) self.assertCountEqual(test_issues, expected_issue, test_strings[test_key]) + def validator_base_new(self, test_strings, expected_results, expected_issues, test_function, + hed_schema, check_for_warnings=False): + # This does direct comparison of the issue before formatting or context. + for test_key in test_strings: + # print(f"\n{test_key}: {test_strings[test_key]}") + hed_string_obj = HedString(test_strings[test_key], self.hed_schema) + test_issues = [] + if self.compute_forms: + test_issues += hed_string_obj._calculate_to_canonical_forms(hed_schema) + if not test_issues: + test_issues += test_function(hed_string_obj) + filtered_issues = self.filter_issues(test_issues) + # print(f"filtered: {str(filtered_issues)}") + these_issues = expected_issues[test_key] + self.assertEqual(len(filtered_issues), len(these_issues), + f"{test_strings[test_key]} should have the same number of issues.") + self.assertCountEqual(filtered_issues, these_issues, test_strings[test_key]) + def validator_semantic(self, test_strings, expected_results, expected_issues, check_for_warnings): validator = self.semantic_hed_input_reader self.validator_base(test_strings, expected_results, expected_issues, self.string_obj_func(validator), check_for_warnings=check_for_warnings, hed_schema=validator._hed_schema) + + def validator_semantic_new(self, test_strings, expected_results, expected_issues, check_for_warnings): + validator = self.semantic_hed_input_reader + self.validator_base_new(test_strings, expected_results, expected_issues, + self.string_obj_func(validator), check_for_warnings=check_for_warnings, + hed_schema=validator._hed_schema) diff --git a/tests/validator/test_tag_validator_library.py b/tests/validator/test_tag_validator_library.py index 3db111130..3fa606541 100644 --- a/tests/validator/test_tag_validator_library.py +++ b/tests/validator/test_tag_validator_library.py @@ -141,7 +141,7 @@ def test_child_required(self): def test_required_units(self): test_strings = { 'hasRequiredUnit': 'Duration/3 ms', - 'missingRequiredUnit': 'Duration/3', + 'missingUnit': 'Duration/3', 'notRequiredNoNumber': 'Age', 'notRequiredNumber': 'Age/0.5', 'notRequiredScientific': 'Age/5.2e-1', @@ -151,7 +151,7 @@ def test_required_units(self): } expected_results = { 'hasRequiredUnit': True, - 'missingRequiredUnit': False, + 'missingUnit': True, 'notRequiredNoNumber': True, 'notRequiredNumber': True, 'notRequiredScientific': True, @@ -161,8 +161,7 @@ def test_required_units(self): # legal_clock_time_units = ['hour:min', 'hour:min:sec'] expected_issues = { 'hasRequiredUnit': [], - 'missingRequiredUnit': self.format_error( - ValidationErrors.UNITS_MISSING, tag=0, default_unit='s'), + 'missingUnit': [], 'notRequiredNoNumber': [], 'notRequiredNumber': [], 'notRequiredScientific': [], @@ -370,22 +369,30 @@ def test_topLevelTagGroup_validation(self): 'invalid2TwoInOne': False, } expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, - tag=0, actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=0), + 'invalid1': [ + {'code': 'DEFINITION_INVALID', + 'message': 'Tag "tl:Definition/InvalidDef" must be in a top level group but was found in another location.', + 'severity': 1} + ], 'valid1': [], 'valid2': [], - 'invalid2': self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, - actual_error=ValidationErrors.DEFINITION_INVALID) - + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1), - 'invalidTwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="tl:Definition/InvalidDef3".split(", ")), - 'invalid2TwoInOne': self.format_error( - ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0, - multiple_tags="tl:Onset".split(", ")), - } - self.validator_semantic(test_strings, expected_results, expected_issues, False) + 'invalid2': [ + {'code': 'DEFINITION_INVALID', + 'message': 'Tag "tl:Definition/InvalidDef2" must be in a top level group but was found in another location.', + 'severity': 1} + ], + 'invalidTwoInOne': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Repeated reserved tag "tl:Definition/InvalidDef3" or multiple reserved tags in group "(tl:Definition/InvalidDef2,tl:Definition/InvalidDef3)"', + 'severity': 1} + ], + 'invalid2TwoInOne': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "tl:Onset" is not allowed with the other tag(s) or Def-expand sub-group in group "(tl:Definition/InvalidDef2,tl:Onset)"', + 'severity': 1} + ], + } + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) def test_taggroup_validation(self): test_strings = { @@ -395,7 +402,7 @@ def test_taggroup_validation(self): 'valid1': '(tl:Def-Expand/ValidDef)', 'valid2': '(tl:Def-Expand/ValidDef), (tl:Def-Expand/ValidDef2)', 'valid3': '(tl:Event, (tl:Def-Expand/InvalidDef2))', - # This case should possibly be flagged as invalid + # This case is flagged later as invalid definition 'semivalid1': '(tl:Def-Expand/InvalidDef2, tl:Def-Expand/InvalidDef3)', 'semivalid2': '(tl:Def-Expand/InvalidDef2, tl:Onset)', } @@ -406,20 +413,34 @@ def test_taggroup_validation(self): 'valid1': True, 'valid2': True, 'valid3': True, - 'semivalid1': True, + 'semivalid1': False, 'semivalid2': True, } expected_issues = { - 'invalid1': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), - 'invalid2': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=0), - 'invalid3': self.format_error(ValidationErrors.HED_TAG_GROUP_TAG, tag=2), + 'invalid1': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "tl:Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], + 'invalid2': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "tl:Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], + 'invalid3': [ + {'code': 'TAG_GROUP_ERROR', + 'message': 'Tag "tl:Def-Expand/InvalidDef" that must be in a group was found in another location.', + 'severity': 1} + ], 'valid1': [], 'valid2': [], 'valid3': [], 'semivalid1': [], - 'semivalid2': [] + 'semivalid2': [ + {'code': 'TEMPORAL_TAG_ERROR', + 'message': "'tl:Onset' tag has no def tag or def-expand group or too many when 1 is required in string.", 'severity': 1}] } - self.validator_semantic(test_strings, expected_results, expected_issues, False) + self.validator_semantic_new(test_strings, expected_results, expected_issues, False) class RequiredTags(TestHed3):