From 43b9990809ed24e4edc63d2c9360f984fc4ece2c Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 3 Aug 2023 19:37:46 -0500 Subject: [PATCH] Cleanup pass of misc model/schema code --- hed/models/base_input.py | 64 +++++++++++------------ hed/models/column_mapper.py | 1 + hed/models/definition_dict.py | 73 +++++++++++++-------------- hed/models/definition_entry.py | 14 ++---- hed/models/expression_parser.py | 43 +++++++++++++--- hed/models/hed_group.py | 84 +++++++++++++------------------ hed/schema/hed_schema_entry.py | 72 +++++++++++++------------- hed/validator/def_validator.py | 15 ++---- hed/validator/onset_validator.py | 8 +-- tests/models/test_base_input.py | 2 - tests/schema/test_schema_entry.py | 1 + 11 files changed, 191 insertions(+), 186 deletions(-) diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 4c2e3a7bb..0e7190498 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -56,9 +56,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T # This is the loaded workbook if we loaded originally from an Excel file. self._loaded_workbook = None self._worksheet_name = worksheet_name - pandas_header = 0 - if not self._has_column_names: - pandas_header = None + self._dataframe = None input_type = file_type if isinstance(file, str): @@ -67,35 +65,8 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T if self.name is None: self._name = file - self._dataframe = None - - if isinstance(file, pandas.DataFrame): - self._dataframe = file.astype(str) - self._has_column_names = self._dataframe_has_names(self._dataframe) - elif not file: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file) - elif input_type in self.TEXT_EXTENSION: - try: - self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, - dtype=str, keep_default_na=True, na_values=["", "null"]) - except Exception as e: - raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e - # Convert nan values to a known value - self._dataframe = self._dataframe.fillna("n/a") - elif input_type in self.EXCEL_EXTENSION: - try: - self._loaded_workbook = openpyxl.load_workbook(file) - loaded_worksheet = self.get_worksheet(self._worksheet_name) - self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names) - except Exception as e: - raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e - else: - raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file) - - if self._dataframe.size == 0: - raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file) + self._open_dataframe_file(file, has_column_names, input_type) - # todo: Can we get rid of this behavior now that we're using pandas? column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names) if column_issues: raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.", @@ -517,3 +488,34 @@ def get_column_refs(self): column_refs(list): A list of unique column refs found """ return [] + + def _open_dataframe_file(self, file, has_column_names, input_type): + pandas_header = 0 + if not has_column_names: + pandas_header = None + + if isinstance(file, pandas.DataFrame): + self._dataframe = file.astype(str) + self._has_column_names = self._dataframe_has_names(self._dataframe) + elif not file: + raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file) + elif input_type in self.TEXT_EXTENSION: + try: + self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, + dtype=str, keep_default_na=True, na_values=["", "null"]) + except Exception as e: + raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e + # Convert nan values to a known value + self._dataframe = self._dataframe.fillna("n/a") + elif input_type in self.EXCEL_EXTENSION: + try: + self._loaded_workbook = openpyxl.load_workbook(file) + loaded_worksheet = self.get_worksheet(self._worksheet_name) + self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names) + except Exception as e: + raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e + else: + raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file) + + if self._dataframe.size == 0: + raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file) diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 1321e9d6d..761ab81a9 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -134,6 +134,7 @@ def check_for_blank_names(column_map, allow_blank_names): return [] issues = [] + for column_number, name in enumerate(column_map): if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE): issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number) diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index dda340eb3..45c5947c2 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -117,14 +117,9 @@ def check_for_definitions(self, hed_string_obj, error_handler=None): def_issues = [] for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}): group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler) - def_tag_name = definition_tag.extension + def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension) - def_takes_value = def_tag_name.lower().endswith("/#") - if def_takes_value: - def_tag_name = def_tag_name[:-len("/#")] - - def_tag_lower = def_tag_name.lower() - if "/" in def_tag_lower or "#" in def_tag_lower: + if "/" in def_tag_name or "#" in def_tag_name: new_def_issues += ErrorHandler.format_error_with_context(error_handler, DefinitionErrors.INVALID_DEFINITION_EXTENSION, tag=definition_tag, @@ -134,29 +129,42 @@ def check_for_definitions(self, hed_string_obj, error_handler=None): def_issues += new_def_issues continue - new_def_issues += self._validate_contents(definition_tag, group_tag, error_handler) + new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler) new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler) if new_def_issues: def_issues += new_def_issues continue - if error_handler: - context = error_handler.get_error_context_copy() - else: - context = [] - if def_tag_lower in self.defs: - new_def_issues += ErrorHandler.format_error_with_context(error_handler, - DefinitionErrors.DUPLICATE_DEFINITION, - def_name=def_tag_name) + new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler) + if new_def_issues: def_issues += new_def_issues continue - self.defs[def_tag_lower] = DefinitionEntry(name=def_tag_name, contents=group_tag, - takes_value=def_takes_value, - source_context=context) + + self.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=group_tag, + takes_value=def_takes_value, + source_context=context) return def_issues + def _strip_value_placeholder(self, def_tag_name): + def_takes_value = def_tag_name.lower().endswith("/#") + if def_takes_value: + def_tag_name = def_tag_name[:-len("/#")] + return def_tag_name, def_takes_value + + def _validate_name_and_context(self, def_tag_name, error_handler): + if error_handler: + context = error_handler.get_error_context_copy() + else: + context = [] + new_def_issues = [] + if def_tag_name.lower() in self.defs: + new_def_issues += ErrorHandler.format_error_with_context(error_handler, + DefinitionErrors.DUPLICATE_DEFINITION, + def_name=def_tag_name) + return new_def_issues, context + def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_handler): new_issues = [] placeholder_tags = [] @@ -245,11 +253,8 @@ def construct_def_tags(self, hed_string_obj): Parameters: hed_string_obj(HedString): The hed string to identify definition contents in """ - for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True): - def_contents = self._get_definition_contents(def_tag) - if def_contents is not None: - def_tag._expandable = def_contents - def_tag._expanded = def_tag != def_expand_group + for tag in hed_string_obj.get_all_tags(): + self.construct_def_tag(tag) def construct_def_tag(self, hed_tag): """ Identify def/def-expand tag contents in the given HedTag. @@ -257,6 +262,8 @@ def construct_def_tag(self, hed_tag): Parameters: hed_tag(HedTag): The hed tag to identify definition contents in """ + # Finish tracking down why parent is set incorrectly on def tags sometimes + # It should be ALWAYS set if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}: save_parent = hed_tag._parent def_contents = self._get_definition_contents(hed_tag) @@ -277,24 +284,16 @@ def _get_definition_contents(self, def_tag): def_contents: HedGroup The contents to replace the previous def-tag with. """ - is_label_tag = def_tag.extension - placeholder = None - found_slash = is_label_tag.find("/") - if found_slash != -1: - placeholder = is_label_tag[found_slash + 1:] - is_label_tag = is_label_tag[:found_slash] - - label_tag_lower = is_label_tag.lower() + tag_label, _, placeholder = def_tag.extension.partition('/') + + label_tag_lower = tag_label.lower() def_entry = self.defs.get(label_tag_lower) if def_entry is None: # Could raise an error here? return None - else: - def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder) - if def_tag_name: - return def_contents - return None + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder) + return def_contents @staticmethod def get_as_strings(def_dict): diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 23845709a..4f3aa8d74 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -36,35 +36,31 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag return_copy_of_tag(bool): Set to true for validation Returns: - tuple: - str: The expanded def tag name - HedGroup: The contents of this definition(including the def tag itself) + HedGroup: The contents of this definition(including the def tag itself) :raises ValueError: - Something internally went wrong with finding the placeholder tag. This should not be possible. """ - if self.takes_value == (placeholder_value is None): - return None, [] + if self.takes_value == (not placeholder_value): + return None if return_copy_of_tag: replace_tag = replace_tag.copy() output_contents = [replace_tag] - name = self.name if self.contents: output_group = self.contents - if placeholder_value is not None: + if placeholder_value: output_group = copy.deepcopy(self.contents) placeholder_tag = output_group.find_placeholder_tag() if not placeholder_tag: raise ValueError("Internal error related to placeholders in definition mapping") - name = f"{name}/{placeholder_value}" placeholder_tag.replace_placeholder(placeholder_value) output_contents = [replace_tag, output_group] output_contents = HedGroup(replace_tag._hed_string, startpos=replace_tag.span[0], endpos=replace_tag.span[1], contents=output_contents) - return f"{DefTagNames.DEF_EXPAND_ORG_KEY}/{name}", output_contents + return output_contents def __str__(self): return str(self.contents) diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index 8a9806d42..d976fe6b8 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -78,10 +78,10 @@ def __init__(self, text): "(": Token.LogicalGroup, ")": Token.LogicalGroupEnd, "~": Token.LogicalNegation, - "?": Token.Wildcard, # Any tag or group - "??": Token.Wildcard, # Any tag - "???": Token.Wildcard, # Any Group - "{": Token.ExactMatch, # Nothing else + "?": Token.Wildcard, # Any tag or group + "??": Token.Wildcard, # Any tag + "???": Token.Wildcard, # Any Group + "{": Token.ExactMatch, # Nothing else "}": Token.ExactMatchEnd, # Nothing else "@": Token.NotInLine } @@ -218,6 +218,7 @@ def handle_expr(self, hed_group, exact=False): all_found_groups = [search_result(group, tag) for tag, group in groups_found] return all_found_groups + class ExpressionOr(Expression): def handle_expr(self, hed_group, exact=False): groups1 = self.left.handle_expr(hed_group, exact=exact) @@ -229,7 +230,7 @@ def handle_expr(self, hed_group, exact=False): for group in groups1: for other_group in groups2: if group.has_same_tags(other_group): - duplicates.append(group) + duplicates.append(group) groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)] @@ -245,12 +246,13 @@ def __str__(self): output_str += ")" return output_str + class ExpressionNegation(Expression): def handle_expr(self, hed_group, exact=False): found_groups = self.right.handle_expr(hed_group, exact=exact) # Todo: this may need more thought with respects to wildcards and negation - #negated_groups = [group for group in hed_group.get_all_groups() if group not in groups] + # negated_groups = [group for group in hed_group.get_all_groups() if group not in groups] # This simpler version works on python >= 3.9 # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups] # Python 3.7/8 compatible version. @@ -259,6 +261,7 @@ def handle_expr(self, hed_group, exact=False): return negated_groups + class ExpressionContainingGroup(Expression): def handle_expr(self, hed_group, exact=False): result = self.right.handle_expr(hed_group, exact=True) @@ -310,7 +313,32 @@ def handle_expr(self, hed_group, exact=False): class QueryParser: """Parse a search expression into a form than can be used to search a hed string.""" + def __init__(self, expression_string): + """Compiles a QueryParser for a particular expression, so it can be used to search hed strings. + + + Basic Input Examples: + + 'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event + + 'Event and Action' - Find any strings with Event and Action, including descendant tags + + 'Event or Action' - Same as above, but it has either + + '"Event"' - Finds the Event tag, but not any descendent tags + + 'Def/DefName/*' - Find Def/DefName instances with placeholders, regardless of the value of the placeholder + + 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event + + '[Event and Action]' - Find a group that contains both Event and Action(at any level) + + '[[Event and Action]]' - Find a group with Event And Action at the same level. + + Parameters: + expression_string(str): The query string + """ self.tokens = [] self.at_token = -1 self.tree = self._parse(expression_string.lower()) @@ -360,7 +388,8 @@ def _handle_negation(self): return self._handle_grouping_op() def _handle_grouping_op(self): - next_token = self._next_token_is([Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch]) + next_token = self._next_token_is( + [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch]) if next_token == Token.ContainingGroup: interior = self._handle_and_op() expr = ExpressionContainingGroup(next_token, right=interior) diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index 98e81df42..1f1c02d41 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -92,7 +92,7 @@ def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]): """ empty_groups = [] # Filter out duplicates - items_to_remove = {id(item):item for item in items_to_remove}.values() + items_to_remove = {id(item): item for item in items_to_remove}.values() for item in items_to_remove: group = item._parent @@ -363,7 +363,8 @@ def __eq__(self, other): return True def find_tags(self, search_tags, recursive=False, include_groups=2): - """ Find the tags and their containing groups. + """ Find the base tags and their containing groups. + This searches by short_base_tag, ignoring any ancestors or extensions/values. Parameters: search_tags (container): A container of short_base_tags to locate @@ -375,23 +376,16 @@ def find_tags(self, search_tags, recursive=False, include_groups=2): Returns: list: The contents of the list depends on the value of include_groups. - - Notes: - - - This can only find identified tags. - - By default, definition, def, def-expand, onset, and offset are identified, even without a schema. - """ found_tags = [] if recursive: - groups = self.get_all_groups() + tags = self.get_all_tags() else: - groups = (self, ) + tags = self.tags() - for sub_group in groups: - for tag in sub_group.tags(): - if tag.short_base_tag.lower() in search_tags: - found_tags.append((tag, sub_group)) + for tag in tags: + if tag.short_base_tag.lower() in search_tags: + found_tags.append((tag, tag._parent)) if include_groups == 0 or include_groups == 1: return [tag[include_groups] for tag in found_tags] @@ -400,6 +394,10 @@ def find_tags(self, search_tags, recursive=False, include_groups=2): def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2): """ Find the tags and their containing groups. + This searches tag.short_tag, with an implicit wildcard on the end. + + e.g. "Eve" will find Event, but not Sensory-event + Parameters: search_tags (container): A container of the starts of short tags to search. recursive (bool): If true, also check subgroups. @@ -413,55 +411,46 @@ def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2): """ found_tags = [] if recursive: - groups = self.get_all_groups() + tags = self.get_all_tags() else: - groups = (self, ) + tags = self.tags() - for sub_group in groups: - for tag in sub_group.tags(): - for search_tag in search_tags: - if tag.short_tag.lower().startswith(search_tag): - found_tags.append((tag, sub_group)) + for tag in tags: + for search_tag in search_tags: + if tag.short_tag.lower().startswith(search_tag): + found_tags.append((tag, tag._parent)) + # We can't find the same tag twice + break if include_groups == 0 or include_groups == 1: return [tag[include_groups] for tag in found_tags] return found_tags - def find_exact_tags(self, tags_or_groups, recursive=False, include_groups=1): - """ Find the given tags or groups. + def find_exact_tags(self, exact_tags, recursive=False, include_groups=1): + """ Find the given tags. This will only find complete matches, any extension or value must also match. Parameters: - tags_or_groups (HedTag, HedGroup): A container of tags to locate. + exact_tags (list of HedTag): A container of tags to locate. recursive (bool): If true, also check subgroups. include_groups(bool): 0, 1 or 2 If 0: Return only tags If 1: Return only groups If 2 or any other value: Return both Returns: - list: A list of HedGroups the given tags/groups were found in. - - Notes: - - If you pass in groups it will only find EXACT matches. - - This can only find identified tags. - - By default, definition, def, def-expand, onset, and offset are identified, even without a schema. - - If this is a HedGroup, order matters. (b, a) != (a, b) - + list: A list of tuples. The contents depend on the values of the include_group. """ found_tags = [] if recursive: - groups = self.get_all_groups() + tags = self.get_all_tags() else: - groups = (self,) + tags = self.tags() - for sub_group in groups: - for search_tag in tags_or_groups: - for tag in sub_group.children: - if tag == search_tag: - found_tags.append((tag, sub_group)) + for tag in tags: + if tag in exact_tags: + found_tags.append((tag, tag._parent)) if include_groups == 0 or include_groups == 1: return [tag[include_groups] for tag in found_tags] - return found_tags def find_def_tags(self, recursive=False, include_groups=3): @@ -481,7 +470,7 @@ def find_def_tags(self, recursive=False, include_groups=3): if recursive: groups = self.get_all_groups() else: - groups = (self, ) + groups = (self,) def_tags = [] for group in groups: @@ -516,16 +505,15 @@ def find_tags_with_term(self, term, recursive=False, include_groups=2): """ found_tags = [] if recursive: - groups = self.get_all_groups() + tags = self.get_all_tags() else: - groups = (self,) + tags = self.tags() search_for = term.lower() - for sub_group in groups: - for tag in sub_group.tags(): - if search_for in tag.tag_terms: - found_tags.append((tag, sub_group)) + for tag in tags: + if search_for in tag.tag_terms: + found_tags.append((tag, tag._parent)) if include_groups == 0 or include_groups == 1: return [tag[include_groups] for tag in found_tags] - return found_tags + return found_tags \ No newline at end of file diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 408b8d984..2ea635050 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -115,10 +115,8 @@ def __eq__(self, other): return False if self.attributes != other.attributes: # We only want to compare known attributes - self_attr = {key: value for key, value in self.attributes.items() - if not self._unknown_attributes or key not in self._unknown_attributes} - other_attr = {key: value for key, value in other.attributes.items() - if not other._unknown_attributes or key not in other._unknown_attributes} + self_attr = self.get_known_attributes() + other_attr = other.get_known_attributes() if self_attr != other_attr: return False if self.description != other.description: @@ -131,6 +129,10 @@ def __hash__(self): def __str__(self): return self.name + def get_known_attributes(self): + return {key: value for key, value in self.attributes.items() + if not self._unknown_attributes or key not in self._unknown_attributes} + class UnitClassEntry(HedSchemaEntry): """ A single unit class entry in the HedSchema. """ @@ -235,6 +237,20 @@ def has_attribute(self, attribute, return_value=False): val = val is not None return val + def _check_inherited_attribute_internal(self, attribute): + """Gather up all instances of an attribute from this entry and any parent entries""" + attribute_values = [] + + iter_entry = self + while iter_entry is not None: + if iter_entry.takes_value_child_entry: + break + if attribute in iter_entry.attributes: + attribute_values.append(iter_entry.attributes[attribute]) + iter_entry = iter_entry._parent_tag + + return attribute_values + def _check_inherited_attribute(self, attribute, return_value=False, return_union=False): """ Checks for the existence of an attribute in this entry and its parents. @@ -253,29 +269,15 @@ def _check_inherited_attribute(self, attribute, return_value=False, return_union - The existence of an attribute does not guarantee its validity. - For string attributes, the values are joined with a comma as a delimiter from all ancestors. """ - if return_value: - attribute_values = [] - - iter_entry = self - while iter_entry is not None: - if iter_entry.takes_value_child_entry: - break - if attribute in iter_entry.attributes: - if return_value: - attribute_values.append(iter_entry.attributes[attribute]) - if not return_union: - return attribute_values[0] - else: - return True - iter_entry = iter_entry._parent_tag + attribute_values = self._check_inherited_attribute_internal(attribute) if return_value: if not attribute_values: return None - # Always return the union as a non-union would've been returned above - return ",".join(attribute_values) - else: - return False + if return_union: + return ",".join(attribute_values) + return attribute_values[0] + return bool(attribute_values) def base_tag_has_attribute(self, tag_attribute): """ Check if the base tag has a specific attribute. @@ -309,21 +311,19 @@ def parent_name(self): parent_name, _, child_name = self.name.rpartition("/") return parent_name + def _finalize_classes(self, schema, attribute_key, section_key): + result = {} + if attribute_key in self.attributes: + for attribute_name in self.attributes[attribute_key].split(","): + entry = schema._get_tag_entry(attribute_name, section_key) + if entry: + result[attribute_name] = entry + return result + def _finalize_takes_value_tag(self, schema): if self.name.endswith("/#"): - if HedKey.UnitClass in self.attributes: - self.unit_classes = {} - for unit_class_name in self.attributes[HedKey.UnitClass].split(","): - entry = schema._get_tag_entry(unit_class_name, HedSectionKey.UnitClasses) - if entry: - self.unit_classes[unit_class_name] = entry - - if HedKey.ValueClass in self.attributes: - self.value_classes = {} - for value_class_name in self.attributes[HedKey.ValueClass].split(","): - entry = schema._get_tag_entry(value_class_name, HedSectionKey.ValueClasses) - if entry: - self.value_classes[value_class_name] = entry + self.unit_classes = self._finalize_classes(schema, HedKey.UnitClass, HedSectionKey.UnitClasses) + self.value_classes = self._finalize_classes(schema, HedKey.ValueClass, HedSectionKey.ValueClasses) def _finalize_inherited_attributes(self): # Replace the list with a copy we can modify. diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 0eb159976..fcafcf87b 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -103,14 +103,9 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): """ def_issues = [] is_def_expand_tag = def_expand_group != def_tag - is_label_tag = def_tag.extension - placeholder = None - found_slash = is_label_tag.find("/") - if found_slash != -1: - placeholder = is_label_tag[found_slash + 1:] - is_label_tag = is_label_tag[:found_slash] - - label_tag_lower = is_label_tag.lower() + tag_label, _, placeholder = def_tag.extension.partition('/') + + label_tag_lower = tag_label.lower() def_entry = self.defs.get(label_tag_lower) if def_entry is None: error_code = ValidationErrors.HED_DEF_UNMATCHED @@ -118,9 +113,9 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: - def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) - if def_tag_name: + if def_contents is not None: if is_def_expand_tag and def_expand_group != def_contents: def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, tag=def_tag, actual_def=def_contents, diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index f44b291ba..b1d928347 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -70,12 +70,8 @@ def _find_onset_tags(self, hed_string_obj): def _handle_onset_or_offset(self, def_tag, onset_offset_tag): is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY - full_def_name = def_name = def_tag.extension - placeholder = None - found_slash = def_name.find("/") - if found_slash != -1: - placeholder = def_name[found_slash + 1:] - def_name = def_name[:found_slash] + full_def_name = def_tag.extension + def_name, _, placeholder = def_tag.extension.partition('/') def_entry = self._defs.get(def_name) if def_entry is None: diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index d93983d59..5f8b2bbab 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -75,7 +75,6 @@ def test_invalid_input_type_dict(self): BaseInput({'key': 'value'}) - class TestInsertColumns(unittest.TestCase): def test_insert_columns_simple(self): @@ -272,4 +271,3 @@ def test_combine_dataframe_with_mixed_values(self): result = BaseInput.combine_dataframe(loaded_df) expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) self.assertTrue(result.equals(expected)) - diff --git a/tests/schema/test_schema_entry.py b/tests/schema/test_schema_entry.py index ae8812942..0985ce2b7 100644 --- a/tests/schema/test_schema_entry.py +++ b/tests/schema/test_schema_entry.py @@ -7,6 +7,7 @@ def __init__(self, attributes, parent=None): self._parent_tag = parent _check_inherited_attribute = HedTagEntry._check_inherited_attribute + _check_inherited_attribute_internal = HedTagEntry._check_inherited_attribute_internal class TestMockEntry(unittest.TestCase):