From 99ddadd1e59cb00e0e4c90171a2a4804a4655af5 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 30 Mar 2023 14:12:23 -0500 Subject: [PATCH 1/3] Update spec tests commit --- spec_tests/hed-specification | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index 9e7d37eb4..9162f09a2 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit 9e7d37eb497a44d0bf6068633077d94788e79792 +Subproject commit 9162f09a2c1f60c35ff68c6a52987f3c33381eb6 From 2b8551b5944a48ab2e0a422e2e7878581f6b3a78 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 30 Mar 2023 18:51:35 -0500 Subject: [PATCH 2/3] Rename HedTag.extension_or_value_portion -> .extension Add HedGroup.get_first_group() Make def/def-expands report unit errors as happening on themself Add first draft def_expand gathering --- hed/models/definition_dict.py | 14 ++- hed/models/definition_entry.py | 58 ++++++------ hed/models/df_util.py | 83 +++++++++++++++++ hed/models/hed_group.py | 30 +++++- hed/models/hed_tag.py | 20 +++- hed/tools/analysis/event_manager.py | 4 +- hed/tools/analysis/hed_context_manager.py | 2 +- hed/tools/analysis/hed_tag_counts.py | 2 +- hed/tools/analysis/hed_type_definitions.py | 8 +- hed/tools/analysis/hed_type_values.py | 4 +- hed/tools/analysis/temporal_event.py | 2 +- hed/validator/def_validator.py | 36 +++++--- hed/validator/hed_validator.py | 34 +++---- hed/validator/onset_validator.py | 2 +- hed/validator/tag_validator.py | 34 ++++--- spec_tests/hed-specification | 2 +- tests/models/test_base_input.py | 4 +- tests/models/test_definition_entry.py | 38 ++++---- tests/models/test_df_util.py | 101 ++++++++++++++++++++- 19 files changed, 360 insertions(+), 118 deletions(-) diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 04cbfc440..fce00e1fa 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -54,14 +54,20 @@ def _add_definitions_from_dict(self, def_dict): def_dict (DefinitionDict): DefDict whose definitions should be added. """ - for def_tag, def_value in def_dict: + for def_tag, def_value in def_dict.items(): self._add_definition(def_tag, def_value) def get(self, def_name): return self.defs.get(def_name.lower()) def __iter__(self): - return iter(self.defs.items()) + return iter(self.defs) + + def __len__(self): + return len(self.defs) + + def items(self): + return self.defs.items() @property def issues(self): @@ -94,7 +100,7 @@ def check_for_definitions(self, hed_string_obj, error_handler=None): """ new_def_issues = [] for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}): - def_tag_name = definition_tag.extension_or_value_portion + def_tag_name = definition_tag.extension # initial validation groups = group.groups() @@ -224,7 +230,7 @@ def _get_definition_contents(self, def_tag): def_contents: HedGroup The contents to replace the previous def-tag with. """ - is_label_tag = def_tag.extension_or_value_portion + is_label_tag = def_tag.extension placeholder = None found_slash = is_label_tag.find("/") if found_slash != -1: diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 27c89d33b..ba9f69b6a 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -18,13 +18,13 @@ def __init__(self, name, contents, takes_value, source_context): """ self.name = name if contents: - contents = contents.copy() + contents = contents.copy().sort() self.contents = contents self.takes_value = takes_value self.source_context = source_context - self.tag_dict = {} - if contents: - add_group_to_dict(contents, self.tag_dict) + # self.tag_dict = {} + # if contents: + # add_group_to_dict(contents, self.tag_dict) def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False): """ Return a copy of the definition with the tag expanded and the placeholder plugged in. @@ -66,28 +66,28 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag startpos=replace_tag.span[0], endpos=replace_tag.span[1], contents=output_contents) return f"{DefTagNames.DEF_EXPAND_ORG_KEY}/{name}", output_contents - -def add_group_to_dict(group, tag_dict=None): - """ Add the tags and their values from a HED group to a tag dictionary. - - Parameters: - group (HedGroup): Contents to add to the tag dict. - tag_dict (dict): The starting tag dictionary to add to. - - Returns: - dict: The updated tag_dict containing the tags with a list of values. - - Notes: - - Expects tags to have forms calculated already. - - """ - if tag_dict is None: - tag_dict = {} - for tag in group.get_all_tags(): - short_base_tag = tag.short_base_tag - value = tag.extension_or_value_portion - value_dict = tag_dict.get(short_base_tag, {}) - if value: - value_dict[value] = '' - tag_dict[short_base_tag] = value_dict - return tag_dict +# +# def add_group_to_dict(group, tag_dict=None): +# """ Add the tags and their values from a HED group to a tag dictionary. +# +# Parameters: +# group (HedGroup): Contents to add to the tag dict. +# tag_dict (dict): The starting tag dictionary to add to. +# +# Returns: +# dict: The updated tag_dict containing the tags with a list of values. +# +# Notes: +# - Expects tags to have forms calculated already. +# +# """ +# if tag_dict is None: +# tag_dict = {} +# for tag in group.get_all_tags(): +# short_base_tag = tag.short_base_tag +# value = tag.extension +# value_dict = tag_dict.get(short_base_tag, {}) +# if value: +# value_dict[value] = '' +# tag_dict[short_base_tag] = value_dict +# return tag_dict diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 989299d2f..3027d1174 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -4,6 +4,8 @@ from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed import HedString +from hed.models.definition_dict import DefinitionDict +from hed.models.definition_entry import DefinitionEntry def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True, @@ -136,3 +138,84 @@ def _shrink_defs(hed_string, hed_schema): def _expand_defs(hed_string, hed_schema, def_dict): from hed import HedString return str(HedString(hed_string, hed_schema, def_dict).expand_defs()) + + +def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): + """ + Processes a list of HED strings according to a given HED schema, using known definitions and ambiguous definitions. + + Args: + hed_strings (list or pd.Series): A list of HED strings to process. + hed_schema (HedSchema): The schema to use + known_defs (DefinitionDict or list or str), optional): + A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must + match perfectly. + ambiguous_defs (dict): A dictionary containing ambiguous definitions + format TBD. Currently def name key: list of lists of hed tags values + """ + if not isinstance(hed_strings, pd.Series): + hed_strings = pd.Series(hed_strings) + + if ambiguous_defs is None: + ambiguous_defs = {} + errors = {} + def_dict = DefinitionDict(known_defs) + + def_expand_mask = hed_strings.str.contains('Def-Expand/', case=False) + + # Iterate over the strings that contain def-expand tags + for i in hed_strings[def_expand_mask].index: + string = hed_strings.loc[i] + hed_str = HedString(string, hed_schema) + + for def_tag, def_expand_group, def_group in hed_str.find_def_tags(recursive=True): + if def_tag == def_expand_group: + continue + + def_tag_name = def_tag.extension.split('/')[0] + # First check for known definitions. If this is known, it's done either way. + def_group_contents = def_dict._get_definition_contents(def_tag) + def_expand_group.sort() + if def_group_contents: + if def_group_contents != def_expand_group: + errors.setdefault(def_tag_name.lower(), []).append(def_group) + continue + + has_extension = "/" in def_tag.extension + + # If there's no extension, this is fine. + if not has_extension: + group_tag = def_expand_group.get_first_group() + def_dict.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=group_tag, + takes_value=False, + source_context=[]) + else: + def_extension = def_tag.extension.split('/')[-1] + # Find any other tags in def_group.get_all_tags() with tags with the same extension + matching_tags = [tag for tag in def_expand_group.get_all_tags() if tag.extension == def_extension and tag != def_tag] + + for tag in matching_tags: + tag.extension = "#" + + group_tag = def_expand_group.get_first_group() + + these_defs = ambiguous_defs.setdefault(def_tag_name.lower(), []) + these_defs.append(group_tag) + + value_per_tag = [] + if len(these_defs) >= 1: + all_tags_list = [group.get_all_tags() for group in these_defs] + for tags in zip(*all_tags_list): + value_per_tag.append(next((tag.extension for tag in tags if tag.extension != "#"), None)) + ambiguous_values = value_per_tag.count(None) + if ambiguous_values == 1: + new_contents = group_tag.copy() + for tag, value in zip(new_contents.get_all_tags(), value_per_tag): + if value is not None: + tag.extension = f"/{value}" + def_dict.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=new_contents, + takes_value=True, + source_context=[]) + del ambiguous_defs[def_tag_name.lower()] + + return def_dict, ambiguous_defs, errors diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index 7273d956c..64b789ec4 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -157,15 +157,18 @@ def sorted(self, update_self=False): Returns: list: The list of all tags in this group, with subgroups being returned as further nested lists """ - output_list = [] + tag_list = [] + group_list = [] queue_list = list(self.children) for child in queue_list: if isinstance(child, HedTag): - output_list.append((child, child)) + tag_list.append((child, child)) else: - output_list.append((child, child.sorted(update_self))) + group_list.append((child, child.sorted(update_self))) - output_list.sort(key=lambda x: str(x[0])) + tag_list.sort(key=lambda x: str(x[0])) + group_list.sort(key=lambda x: str(x[0])) + output_list = tag_list + group_list if update_self: self._children = [x[0] for x in output_list] return [x[1] for x in output_list] @@ -260,6 +263,19 @@ def groups(self): """ return [group for group in self.children if isinstance(group, HedGroup)] + def get_first_group(self): + """ Returns the first group in this hed string or group. + + Useful for things like Def-expand where they only have a single group. + + Raises a ValueError if there are no groups. + + Returns: + HedGroup: The first group + + """ + return self.groups()[0] + def get_original_hed_string(self): """ Get the original hed string. @@ -343,7 +359,7 @@ def find_placeholder_tag(self): """ for tag in self.get_all_tags(): - if "#" in tag.org_tag: + if tag.is_placeholder(): return tag return None @@ -356,6 +372,10 @@ def __eq__(self, other): if self is other: return True + # Allow us to compare to a list of groups. + # Note this comparison will NOT check if the list has the outer parenthesis + if isinstance(other, list): + return self.children == other if isinstance(other, str): return str(self) == other if not isinstance(other, HedGroup) or self.children != other.children or self.is_group != other.is_group: diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 689eeac1d..57fa7e3ad 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -215,7 +215,7 @@ def tag(self, new_tag_val): self.convert_to_canonical_forms(self._schema) @property - def extension_or_value_portion(self): + def extension(self): """ Get the extension or value of tag Generally this is just the portion after the last slash. @@ -233,6 +233,11 @@ def extension_or_value_portion(self): return "" + @extension.setter + def extension(self, x): + self._extension_value = f"/{x}" + + @property def long_tag(self): """ Long form including value or extension. @@ -347,7 +352,7 @@ def get_stripped_unit_value(self): if stripped_value: return stripped_value, unit - return self.extension_or_value_portion, None + return self.extension, None @property def unit_classes(self): @@ -450,7 +455,7 @@ def is_basic_tag(self): bool: True if this is a known tag without extension or value. """ - return bool(self._schema_entry and not self.extension_or_value_portion) + return bool(self._schema_entry and not self.extension) def has_attribute(self, attribute): """ Return true if this is an attribute this tag has. @@ -570,7 +575,7 @@ def _get_tag_units_portion(self, tag_unit_classes): stripped_value (str): The value with the units removed. """ - value, _, units = self.extension_or_value_portion.rpartition(" ") + value, _, units = self.extension.rpartition(" ") if not units: return None, None @@ -598,6 +603,11 @@ def _find_modifier_unit_entry(units, all_valid_unit_permutations): return possible_match + def is_placeholder(self): + if "#" in self.org_tag or "#" in self._extension_value: + return True + return False + def replace_placeholder(self, placeholder_value): """ If tag has a placeholder character(#), replace with value. @@ -605,7 +615,7 @@ def replace_placeholder(self, placeholder_value): placeholder_value (str): Value to replace placeholder with. """ - if "#" in self.org_tag: + if self.is_placeholder(): if self._schema_entry: self._extension_value = self._extension_value.replace("#", placeholder_value) else: diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index f8bf5e5f5..1f1a1acea 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -66,7 +66,7 @@ def _create_event_list(self): for tup in group_tuples: group = tup[1] anchor_tag = group.find_def_tags(recursive=False, include_groups=0)[0] - anchor = anchor_tag.extension_or_value_portion.lower() + anchor = anchor_tag.extension.lower() if anchor in onset_dict or tup[0].short_base_tag.lower() == "offset": temporal_event = onset_dict.pop(anchor) temporal_event.set_end(event_index, self.data.dataframe.loc[event_index, "onset"]) @@ -114,7 +114,7 @@ def _update_onset_list(self, group, onset_dict, event_index): - Modifies onset_dict and onset_list. """ # def_tags = group.find_def_tags(recursive=False, include_groups=0) - # name = def_tags[0].extension_or_value_portion + # name = def_tags[0].extension # onset_element = onset_dict.pop(name, None) # if onset_element: # onset_element.end_index = event_index diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py index 72298de1f..1cee65138 100644 --- a/hed/tools/analysis/hed_context_manager.py +++ b/hed/tools/analysis/hed_context_manager.py @@ -129,7 +129,7 @@ def _update_onset_list(self, group, onset_dict, event_index, is_offset=False): - Modifies onset_dict and onset_list. """ def_tags = group.find_def_tags(recursive=False, include_groups=0) - name = def_tags[0].extension_or_value_portion + name = def_tags[0].extension onset_element = onset_dict.pop(name, None) if onset_element: onset_element.end_index = event_index diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index fe6347d2e..845e448b5 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -29,7 +29,7 @@ def set_value(self, hed_tag): """ if not hed_tag: return - value = hed_tag.extension_or_value_portion + value = hed_tag.extension if not value: value = None if value in self.value_dict: diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_definitions.py index 8d49dc060..1cd80c914 100644 --- a/hed/tools/analysis/hed_type_definitions.py +++ b/hed/tools/analysis/hed_type_definitions.py @@ -89,11 +89,11 @@ def _extract_entry_values(self, entry): for hed_tag in tag_list: hed_tag.convert_to_canonical_forms(self.hed_schema) if hed_tag.short_base_tag.lower() == 'description': - description = hed_tag.extension_or_value_portion + description = hed_tag.extension elif hed_tag.short_base_tag.lower() != self.type_tag: other_tags.append(hed_tag.short_base_tag) else: - value = hed_tag.extension_or_value_portion.lower() + value = hed_tag.extension.lower() if value: type_tag_values.append(value) else: @@ -113,9 +113,9 @@ def get_def_names(item, no_value=True): """ if isinstance(item, HedTag) and 'def' in item.tag_terms: - names = [item.extension_or_value_portion.lower()] + names = [item.extension.lower()] else: - names = [tag.extension_or_value_portion.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms] + names = [tag.extension.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms] if no_value: for index, name in enumerate(names): name, name_value = HedTypeDefinitions.split_name(name) diff --git a/hed/tools/analysis/hed_type_values.py b/hed/tools/analysis/hed_type_values.py index 2b6f71583..ceb0d954e 100644 --- a/hed/tools/analysis/hed_type_values.py +++ b/hed/tools/analysis/hed_type_values.py @@ -141,7 +141,7 @@ def _update_definition_variables(self, tag, hed_vars, index): This modifies the HedTypeFactors map. """ - level = tag.extension_or_value_portion.lower() + level = tag.extension.lower() for var_name in hed_vars: hed_var = self._type_value_map.get(var_name, None) if hed_var is None: @@ -185,7 +185,7 @@ def _update_variables(self, tag_list, index): """ for tag in tag_list: - tag_value = tag.extension_or_value_portion.lower() + tag_value = tag.extension.lower() if not tag_value: tag_value = self.type_tag hed_var = self._type_value_map.get(tag_value, None) diff --git a/hed/tools/analysis/temporal_event.py b/hed/tools/analysis/temporal_event.py index a8fcbbcde..a60243a8e 100644 --- a/hed/tools/analysis/temporal_event.py +++ b/hed/tools/analysis/temporal_event.py @@ -20,7 +20,7 @@ def set_end(self, end_index, end_time): def _split_group(self): for item in self.event_group.children: if isinstance(item, HedTag) and (item.short_tag.lower() != "onset"): - self.anchor = item.extension_or_value_portion.lower() + self.anchor = item.extension.lower() elif isinstance(item, HedTag): continue elif isinstance(item, HedGroup): diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 5b18cd466..2a362af18 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -19,12 +19,12 @@ def __init__(self, def_dicts=None, hed_schema=None): """ super().__init__(def_dicts, hed_schema=hed_schema) - def validate_def_tags(self, hed_string_obj): + def validate_def_tags(self, hed_string_obj, tag_validator=None): """ Validate Def/Def-Expand tags. Parameters: hed_string_obj (HedString): The hed string to process. - + tag_validator (TagValidator): Used to validate the placeholder replacement. Returns: list: Issues found related to validating defs. Each issue is a dictionary. """ @@ -35,24 +35,24 @@ def validate_def_tags(self, hed_string_obj): def_issues = [] # We need to check for labels to expand in ALL groups for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True): - def_issues += self._validate_def_contents(def_tag, def_expand_group) + def_issues += self._validate_def_contents(def_tag, def_expand_group, tag_validator) return def_issues - def _validate_def_contents(self, def_tag, def_expand_group): + def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): """ Check for issues with expanding a tag from Def to a Def-expand tag group Parameters: def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag. def_expand_group (HedGroup or HedTag): Source group for this def-expand tag. Same as def_tag if this is not a def-expand tag. - + tag_validator (TagValidator): Used to validate the placeholder replacement. Returns: issues """ def_issues = [] - is_def_tag = def_expand_group is not def_tag - is_label_tag = def_tag.extension_or_value_portion + is_def_expand_tag = def_expand_group != def_tag + is_label_tag = def_tag.extension placeholder = None found_slash = is_label_tag.find("/") if found_slash != -1: @@ -63,25 +63,39 @@ def _validate_def_contents(self, def_tag, def_expand_group): def_entry = self.defs.get(label_tag_lower) if def_entry is None: error_code = ValidationErrors.HED_DEF_UNMATCHED - if is_def_tag: + if is_def_expand_tag: error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) if def_tag_name: - if is_def_tag and def_expand_group != def_contents: + if is_def_expand_tag and def_expand_group != def_contents: def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, tag=def_tag, actual_def=def_contents, found_def=def_expand_group) + if def_entry.takes_value and tag_validator: + placeholder_tag = def_contents.find_placeholder_tag() + error_code = ValidationErrors.DEF_INVALID + if is_def_expand_tag: + error_code = ValidationErrors.DEF_EXPAND_INVALID + if placeholder_tag.is_unit_class_tag(): + def_issues += tag_validator.check_tag_unit_class_units_are_valid(placeholder_tag, + report_tag_as=def_tag, + error_code=error_code) + elif placeholder_tag.is_value_class_tag(): + def_issues += tag_validator.check_tag_value_class_valid(placeholder_tag, + report_tag_as=def_tag, + error_code=error_code) + elif def_entry.takes_value: error_code = ValidationErrors.HED_DEF_VALUE_MISSING - if is_def_tag: + if is_def_expand_tag: error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: error_code = ValidationErrors.HED_DEF_VALUE_EXTRA - if is_def_tag: + if is_def_expand_tag: error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA def_issues += ErrorHandler.format_error(error_code, tag=def_tag) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index c7ce76adf..ac9746fc4 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -69,9 +69,11 @@ def run_basic_checks(self, hed_string, allow_placeholders): # e.g. checking units when a definition placeholder has units self._def_validator.construct_def_tags(hed_string) issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders) - if check_for_any_errors(issues): - return issues - issues += self._def_validator.validate_def_tags(hed_string) + # todo: maybe restore this. Issue is bad def-expand values are caught above, + # so the actual def-expand tag won't be checked if we bail early. + # if check_for_any_errors(issues): + # return issues + issues += self._def_validator.validate_def_tags(hed_string, self._tag_validator) if check_for_any_errors(issues): return issues issues += self._onset_validator.validate_onset_offset(hed_string) @@ -165,20 +167,20 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol """ from hed.models.definition_dict import DefTagNames validation_issues = [] - def_groups = hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}, include_groups=1) - all_def_groups = [group for sub_group in def_groups for group in sub_group.get_all_groups()] + definition_groups = hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}, include_groups=1) + all_definition_groups = [group for sub_group in definition_groups for group in sub_group.get_all_groups()] for group in hed_string_obj.get_all_groups(): - is_definition = group in all_def_groups + is_definition = group in all_definition_groups for hed_tag in group.tags(): - if hed_tag.expandable and not hed_tag.expanded: - for tag in hed_tag.expandable.get_all_tags(): - validation_issues += self._tag_validator. \ - run_individual_tag_validators(tag, allow_placeholders=allow_placeholders, - is_definition=is_definition) - else: - validation_issues += self._tag_validator. \ - run_individual_tag_validators(hed_tag, - allow_placeholders=allow_placeholders, - is_definition=is_definition) + # if hed_tag.expandable and not hed_tag.expanded: + # for tag in hed_tag.expandable.get_all_tags(): + # validation_issues += self._tag_validator. \ + # run_individual_tag_validators(tag, allow_placeholders=allow_placeholders, + # is_definition=is_definition) + # else: + validation_issues += self._tag_validator. \ + run_individual_tag_validators(hed_tag, + allow_placeholders=allow_placeholders, + is_definition=is_definition) return validation_issues diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index 942f58efb..af5db6bec 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -70,7 +70,7 @@ def _find_onset_tags(self, hed_string_obj): def _handle_onset_or_offset(self, def_tag, onset_offset_tag): is_onset = onset_offset_tag.short_base_tag.lower() == DefTagNames.ONSET_KEY - full_def_name = def_name = def_tag.extension_or_value_portion + full_def_name = def_name = def_tag.extension placeholder = None found_slash = def_name.find("/") if found_slash != -1: diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index 4ecadfc5a..783b035b6 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -84,7 +84,7 @@ def run_individual_tag_validators(self, original_tag, allow_placeholders=False, validation_issues += self.check_tag_unit_class_units_are_valid(original_tag) elif original_tag.is_value_class_tag(): validation_issues += self.check_tag_value_class_valid(original_tag) - elif original_tag.extension_or_value_portion: + elif original_tag.extension: validation_issues += self.check_for_invalid_extension_chars(original_tag) if not allow_placeholders: @@ -278,12 +278,13 @@ def check_tag_exists_in_schema(self, original_tag): index_in_tag_end=None) return validation_issues - def check_tag_unit_class_units_are_valid(self, original_tag): + def check_tag_unit_class_units_are_valid(self, original_tag, report_tag_as=None, error_code=None): """ Report incorrect unit class or units. Parameters: original_tag (HedTag): The original tag that is used to report the error. - + report_tag_as (HedTag): Report errors as coming from this tag, rather than original_tag. + error_code (str): Override error codes to this Returns: list: Validation issues. Each issue is a dictionary. """ @@ -296,29 +297,36 @@ def check_tag_unit_class_units_are_valid(self, original_tag): if tag_validator_util.validate_numeric_value_class(stripped_value): default_unit = original_tag.get_unit_class_default_unit() validation_issues += ErrorHandler.format_error(ValidationErrors.HED_UNITS_DEFAULT_USED, - tag=original_tag, - default_unit=default_unit) + tag=report_tag_as if report_tag_as else original_tag, + default_unit=default_unit, + actual_error=error_code) else: tag_unit_class_units = original_tag.get_tag_unit_class_units() if tag_unit_class_units: + default_code = ValidationErrors.HED_UNITS_INVALID + if not error_code: + error_code = default_code validation_issues += ErrorHandler.format_error(ValidationErrors.HED_UNITS_INVALID, - original_tag, + actual_error=error_code, + tag=report_tag_as if report_tag_as else original_tag, units=tag_unit_class_units) return validation_issues - def check_tag_value_class_valid(self, original_tag): + def check_tag_value_class_valid(self, original_tag, report_tag_as=None, error_code=None): """ Report an invalid value portion. Parameters: original_tag (HedTag): The original tag that is used to report the error. - + report_tag_as (HedTag): Report errors as coming from this tag, rather than original_tag. + error_code (str): Override error codes to this Returns: list: Validation issues. """ validation_issues = [] - if not self._validate_value_class_portion(original_tag, original_tag.extension_or_value_portion): + if not self._validate_value_class_portion(original_tag, original_tag.extension): validation_issues += ErrorHandler.format_error(ValidationErrors.HED_VALUE_INVALID, - original_tag) + report_tag_as if report_tag_as else original_tag, + actual_error=error_code) return validation_issues @@ -349,7 +357,7 @@ def check_tag_unit_class_units_exist(self, original_tag): """ validation_issues = [] if original_tag.is_unit_class_tag(): - tag_unit_values = original_tag.extension_or_value_portion + tag_unit_values = original_tag.extension if tag_validator_util.validate_numeric_value_class(tag_unit_values): default_unit = original_tag.get_unit_class_default_unit() validation_issues += ErrorHandler.format_error(ValidationErrors.HED_UNITS_DEFAULT_USED, @@ -369,7 +377,7 @@ def check_for_invalid_extension_chars(self, original_tag): allowed_chars = self.TAG_ALLOWED_CHARS allowed_chars += self.DEFAULT_ALLOWED_PLACEHOLDER_CHARS allowed_chars += " " - return self._check_invalid_chars(original_tag.extension_or_value_portion, allowed_chars, original_tag, + return self._check_invalid_chars(original_tag.extension, allowed_chars, original_tag, starting_index=len(original_tag.org_base_tag) + 1) def check_capitalization(self, original_tag): @@ -554,7 +562,7 @@ def check_for_placeholder(self, original_tag, is_definition=False): validation_issues = [] if not is_definition: starting_index = len(original_tag.org_base_tag) + 1 - for i, character in enumerate(original_tag.extension_or_value_portion): + for i, character in enumerate(original_tag.extension): if character == "#": validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=original_tag, diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index 9162f09a2..8772db30c 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit 9162f09a2c1f60c35ff68c6a52987f3c33381eb6 +Subproject commit 8772db30cf7c63a4fc224aac9e7daf504f276a1b diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 30f3714aa..32615fb76 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -51,8 +51,8 @@ def test_gathered_defs(self): # todo: add unit tests for definitions in tsv file defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) expected_defs = { - 'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)', - 'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)', + 'jsonfiledef': '(Item/JsonDef1,Item/JsonDef1/#)', + 'jsonfiledef2': '(Item/JsonDef2,Item/JsonDef2/#)', 'jsonfiledef3': '(Item/JsonDef3/#)', 'takesvaluedef': '(Age/#)', 'valueclassdef': '(Acceleration/#)' diff --git a/tests/models/test_definition_entry.py b/tests/models/test_definition_entry.py index e74af4a02..9ff70bc42 100644 --- a/tests/models/test_definition_entry.py +++ b/tests/models/test_definition_entry.py @@ -18,25 +18,25 @@ def setUpClass(cls): hed_schema=hed_schema) cls.hed_schema = hed_schema - def test_constructor(self): - def_entry1 = DefinitionEntry('Def1', self.def1, False, None) - self.assertIsInstance(def_entry1, DefinitionEntry) - self.assertIn('Condition-variable/Blech', def_entry1.tag_dict) - def_entry2 = DefinitionEntry('Def2', self.def2, False, None) - self.assertIsInstance(def_entry2, DefinitionEntry) - self.assertNotIn('Condition-variable/Blech', def_entry2.tag_dict) - def_entry3 = DefinitionEntry('Def3', self.def3, False, None) - self.assertIsInstance(def_entry3, DefinitionEntry) - self.assertIn('Condition-variable/Blech', def_entry3.tag_dict) - def_entry4 = DefinitionEntry('Def4', self.def4, False, None) - self.assertIsInstance(def_entry4, DefinitionEntry) - self.assertNotIn('Condition-variable/Blech', def_entry4.tag_dict) - def_entry3a = DefinitionEntry('Def3a', self.def3, True, None) - self.assertIsInstance(def_entry3a, DefinitionEntry) - self.assertIn('Condition-variable/Blech', def_entry3a.tag_dict) - def_entry4a = DefinitionEntry('Def4a', self.def4, True, None) - self.assertIsInstance(def_entry4a, DefinitionEntry) - self.assertNotIn('Condition-variable/Blech', def_entry4a.tag_dict) + # def test_constructor(self): + # def_entry1 = DefinitionEntry('Def1', self.def1, False, None) + # self.assertIsInstance(def_entry1, DefinitionEntry) + # self.assertIn('Condition-variable/Blech', def_entry1.tag_dict) + # def_entry2 = DefinitionEntry('Def2', self.def2, False, None) + # self.assertIsInstance(def_entry2, DefinitionEntry) + # self.assertNotIn('Condition-variable/Blech', def_entry2.tag_dict) + # def_entry3 = DefinitionEntry('Def3', self.def3, False, None) + # self.assertIsInstance(def_entry3, DefinitionEntry) + # self.assertIn('Condition-variable/Blech', def_entry3.tag_dict) + # def_entry4 = DefinitionEntry('Def4', self.def4, False, None) + # self.assertIsInstance(def_entry4, DefinitionEntry) + # self.assertNotIn('Condition-variable/Blech', def_entry4.tag_dict) + # def_entry3a = DefinitionEntry('Def3a', self.def3, True, None) + # self.assertIsInstance(def_entry3a, DefinitionEntry) + # self.assertIn('Condition-variable/Blech', def_entry3a.tag_dict) + # def_entry4a = DefinitionEntry('Def4a', self.def4, True, None) + # self.assertIsInstance(def_entry4a, DefinitionEntry) + # self.assertNotIn('Condition-variable/Blech', def_entry4a.tag_dict) def test_get_definition(self): def_entry1 = DefinitionEntry('Def1', self.def1, False, None) diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 50a02e133..9c5126a34 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -3,7 +3,7 @@ from hed import load_schema_version -from hed.models.df_util import shrink_defs, expand_defs, convert_to_form +from hed.models.df_util import shrink_defs, expand_defs, convert_to_form, process_def_expands from hed import DefinitionDict @@ -153,3 +153,102 @@ def test_convert_to_form_multiple_tags_long(self): expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) result = convert_to_form(df, self.schema, "long_tag", ['column1']) pd.testing.assert_frame_equal(result, expected_df) + + def test_basic_expand_detection(self): + # all simple cases with no duplicates + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/3))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/3))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Action/5, Joyful, Event))", + "(Def-expand/C3/6, (Action/6, Joyful, Event))" + ] + process_def_expands(test_strings, self.schema) + + def test_mixed_detection(self): + # Cases where you can only retroactively identify the first def-expand + test_strings = [ + # Basic example first just to verify + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/2))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", + # Out of order ambiguous + "(Def-expand/B2/3, (Action/3, Collection/animals, Age/3))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Age/3))", + # Multiple tags + "(Def-expand/C3/5, (Action/5, Age/5, Item-count/5))", + "(Def-expand/C3/6, (Action/6, Age/5, Item-count/5))", + # Multiple tags2 + "(Def-expand/D4/7, (Action/7, Age/7, Item-count/8))", + "(Def-expand/D4/8, (Action/8, Age/7, Item-count/8))" + # Multiple tags3 + "(Def-expand/D5/7, (Action/7, Age/7, Item-count/8, Event))", + "(Def-expand/D5/8, (Action/8, Age/7, Item-count/8, Event))" + ] + def_dict, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) + self.assertEqual(len(def_dict), 5) + + def test_ambiguous_defs(self): + # Cases that can't be identified + test_strings = [ + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Age/3))", + "(Def-expand/C3/5, (Action/5, Age/5, Item-count/5))", + "(Def-expand/D4/7, (Action/7, Age/7, Item-count/8))", + "(Def-expand/D5/7, (Action/7, Age/7, Item-count/8, Event))", + ] + _, ambiguous_defs, _ = process_def_expands(test_strings, self.schema) + self.assertEqual(len(ambiguous_defs), 5) + + def test_errors(self): + # Cases where you can only retroactively identify the first def-expand + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-count/2))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", + "(Def-expand/A1/3, (Action/3, Age/5, Item-count/3))", + + ] + _, _, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(errors), 1) + + def test_def_expand_detection(self): + test_strings = [ + "(Def-expand/A1/1, (Action/1, Age/5, Item-Count/2))", + "(Def-expand/A1/2, (Action/2, Age/5, Item-Count/2))", + "(Def-expand/B2/3, (Action/3, Collection/animals, Alert))", + "(Def-expand/B2/4, (Action/4, Collection/animals, Alert))", + "(Def-expand/C3/5, (Action/5, Joyful, Event))", + "(Def-expand/C3/6, (Action/6, Joyful, Event))", + "((Def-expand/A1/7, (Action/7, Age/5, Item-Count/2)), Event, Age/10)", + "((Def-expand/A1/8, (Action/8, Age/5, Item-Count/2)), Collection/toys, Item-Count/5)", + "((Def-expand/B2/9, (Action/9, Collection/animals, Alert)), Event, Collection/plants)", + "((Def-expand/B2/10, (Action/10, Collection/animals, Alert)), Joyful, Item-Count/3)", + "((Def-expand/C3/11, (Action/11, Joyful, Event)), Collection/vehicles, Age/20)", + "((Def-expand/C3/12, (Action/12, Joyful, Event)), Alert, Item-Count/8)", + "((Def-expand/A1/13, (Action/13, Age/5, Item-Count/2)), (Def-expand/B2/13, (Action/13, Collection/animals, Alert)), Event)", + "((Def-expand/A1/14, (Action/14, Age/5, Item-Count/2)), Joyful, (Def-expand/C3/14, (Action/14, Joyful, Event)))", + "(Def-expand/B2/15, (Action/15, Collection/animals, Alert), (Def-expand/C3/15, (Action/15, Joyful, Event)), Age/30)", + "((Def-expand/A1/16, (Action/16, Age/5, Item-Count/2)), (Def-expand/B2/16, (Action/16, Collection/animals, Alert)), Collection/food)", + "(Def-expand/C3/17, (Action/17, Joyful, Event)), (Def-expand/A1/17, (Action/17, Age/5, Item-Count/2)), Item-Count/6", + "((Def-expand/B2/18, (Action/18, Collection/animals, Alert)), (Def-expand/C3/18, (Action/18, Joyful, Event)), Alert)", + "(Def-expand/D1/Apple, (Task/Apple, Collection/cars, Attribute/color))", + "(Def-expand/D1/Banana, (Task/Banana, Collection/cars, Attribute/color))", + "(Def-expand/E2/Carrot, (Collection/Carrot, Collection/plants, Attribute/type))", + "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Attribute/type))", + "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Attribute/color)), (Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Attribute/type)), Event)", + "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Attribute/color)), Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Attribute/type)))", + "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color), (Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Age/30)", + "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Attribute/length))", + "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Attribute/length))", + "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Attribute/material))", + "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Attribute/material))", + "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Attribute/length)), (Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Attribute/material)), Event)", + "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Attribute/length)), Joyful, (Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Attribute/material)))", + "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length), (Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material)))" + ] + + def_dict, ambiguous, errors = process_def_expands(test_strings, self.schema) + self.assertEqual(len(def_dict), 7) + self.assertEqual(len(ambiguous), 0) + self.assertEqual(len(errors), 0) + From 43dc4576c515baffb77ac7d504e531048987e739 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 30 Mar 2023 19:02:31 -0500 Subject: [PATCH 3/3] Fix tests --- tests/models/test_df_util.py | 6 +++--- tests/models/test_sidecar.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 9c5126a34..5bcdfe704 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -227,7 +227,7 @@ def test_def_expand_detection(self): "((Def-expand/C3/12, (Action/12, Joyful, Event)), Alert, Item-Count/8)", "((Def-expand/A1/13, (Action/13, Age/5, Item-Count/2)), (Def-expand/B2/13, (Action/13, Collection/animals, Alert)), Event)", "((Def-expand/A1/14, (Action/14, Age/5, Item-Count/2)), Joyful, (Def-expand/C3/14, (Action/14, Joyful, Event)))", - "(Def-expand/B2/15, (Action/15, Collection/animals, Alert), (Def-expand/C3/15, (Action/15, Joyful, Event)), Age/30)", + "(Def-expand/B2/15, (Action/15, Collection/animals, Alert)), (Def-expand/C3/15, (Action/15, Joyful, Event)), Age/30", "((Def-expand/A1/16, (Action/16, Age/5, Item-Count/2)), (Def-expand/B2/16, (Action/16, Collection/animals, Alert)), Collection/food)", "(Def-expand/C3/17, (Action/17, Joyful, Event)), (Def-expand/A1/17, (Action/17, Age/5, Item-Count/2)), Item-Count/6", "((Def-expand/B2/18, (Action/18, Collection/animals, Alert)), (Def-expand/C3/18, (Action/18, Joyful, Event)), Alert)", @@ -237,14 +237,14 @@ def test_def_expand_detection(self): "(Def-expand/E2/Dog, (Collection/Dog, Collection/plants, Attribute/type))", "((Def-expand/D1/Elephant, (Task/Elephant, Collection/cars, Attribute/color)), (Def-expand/E2/Fox, (Collection/Fox, Collection/plants, Attribute/type)), Event)", "((Def-expand/D1/Giraffe, (Task/Giraffe, Collection/cars, Attribute/color)), Joyful, (Def-expand/E2/Horse, (Collection/Horse, Collection/plants, Attribute/type)))", - "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color), (Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Age/30)", + "(Def-expand/D1/Iguana, (Task/Iguana, Collection/cars, Attribute/color)), (Def-expand/E2/Jaguar, (Collection/Jaguar, Collection/plants, Attribute/type)), Age/30", "(Def-expand/F1/Lion, (Task/Lion, Collection/boats, Attribute/length))", "(Def-expand/F1/Monkey, (Task/Monkey, Collection/boats, Attribute/length))", "(Def-expand/G2/Nest, (Collection/Nest, Collection/instruments, Attribute/material))", "(Def-expand/G2/Octopus, (Collection/Octopus, Collection/instruments, Attribute/material))", "((Def-expand/F1/Panda, (Task/Panda, Collection/boats, Attribute/length)), (Def-expand/G2/Quail, (Collection/Quail, Collection/instruments, Attribute/material)), Event)", "((Def-expand/F1/Rabbit, (Task/Rabbit, Collection/boats, Attribute/length)), Joyful, (Def-expand/G2/Snake, (Collection/Snake, Collection/instruments, Attribute/material)))", - "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length), (Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material)))" + "(Def-expand/F1/Turtle, (Task/Turtle, Collection/boats, Attribute/length)), (Def-expand/G2/Umbrella, (Collection/Umbrella, Collection/instruments, Attribute/material))" ] def_dict, ambiguous, errors = process_def_expands(test_strings, self.schema) diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index 1925745ae..8d2d85c12 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -95,8 +95,8 @@ def test_validate_column_group(self): extra_def_dict.check_for_definitions(hed_string) validation_issues2 = self.json_without_definitions_sidecar.validate(self.hed_schema, extra_def_dicts=extra_def_dict) - # this removes one undef matched error and adds two extended tag warnings - self.assertEqual(len(validation_issues2), 9) + # this removes one undef matched error + self.assertEqual(len(validation_issues2), 7) def test_duplicate_def(self): sidecar = self.json_def_sidecar