From 43b9990809ed24e4edc63d2c9360f984fc4ece2c Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 3 Aug 2023 19:37:46 -0500
Subject: [PATCH] Cleanup pass of misc model/schema code

---
 hed/models/base_input.py          | 64 +++++++++++------------
 hed/models/column_mapper.py       |  1 +
 hed/models/definition_dict.py     | 73 +++++++++++++--------------
 hed/models/definition_entry.py    | 14 ++----
 hed/models/expression_parser.py   | 43 +++++++++++++---
 hed/models/hed_group.py           | 84 +++++++++++++------------------
 hed/schema/hed_schema_entry.py    | 72 +++++++++++++-------------
 hed/validator/def_validator.py    | 15 ++----
 hed/validator/onset_validator.py  |  8 +--
 tests/models/test_base_input.py   |  2 -
 tests/schema/test_schema_entry.py |  1 +
 11 files changed, 191 insertions(+), 186 deletions(-)

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 4c2e3a7bb..0e7190498 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -56,9 +56,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         # This is the loaded workbook if we loaded originally from an Excel file.
         self._loaded_workbook = None
         self._worksheet_name = worksheet_name
-        pandas_header = 0
-        if not self._has_column_names:
-            pandas_header = None
+        self._dataframe = None
 
         input_type = file_type
         if isinstance(file, str):
@@ -67,35 +65,8 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
             if self.name is None:
                 self._name = file
 
-        self._dataframe = None
-
-        if isinstance(file, pandas.DataFrame):
-            self._dataframe = file.astype(str)
-            self._has_column_names = self._dataframe_has_names(self._dataframe)
-        elif not file:
-            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
-        elif input_type in self.TEXT_EXTENSION:
-            try:
-                self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
-                                                  dtype=str, keep_default_na=True, na_values=["", "null"])
-            except Exception as e:
-                raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
-            # Convert nan values to a known value
-            self._dataframe = self._dataframe.fillna("n/a")
-        elif input_type in self.EXCEL_EXTENSION:
-            try:
-                self._loaded_workbook = openpyxl.load_workbook(file)
-                loaded_worksheet = self.get_worksheet(self._worksheet_name)
-                self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
-            except Exception as e:
-                raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e
-        else:
-            raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)
-
-        if self._dataframe.size == 0:
-            raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)
+        self._open_dataframe_file(file, has_column_names, input_type)
 
-        # todo: Can we get rid of this behavior now that we're using pandas?
         column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names)
         if column_issues:
             raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
@@ -517,3 +488,34 @@ def get_column_refs(self):
             column_refs(list): A list of unique column refs found
         """
         return []
+
+    def _open_dataframe_file(self, file, has_column_names, input_type):
+        pandas_header = 0
+        if not has_column_names:
+            pandas_header = None
+
+        if isinstance(file, pandas.DataFrame):
+            self._dataframe = file.astype(str)
+            self._has_column_names = self._dataframe_has_names(self._dataframe)
+        elif not file:
+            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
+        elif input_type in self.TEXT_EXTENSION:
+            try:
+                self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
+                                                  dtype=str, keep_default_na=True, na_values=["", "null"])
+            except Exception as e:
+                raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
+            # Convert nan values to a known value
+            self._dataframe = self._dataframe.fillna("n/a")
+        elif input_type in self.EXCEL_EXTENSION:
+            try:
+                self._loaded_workbook = openpyxl.load_workbook(file)
+                loaded_worksheet = self.get_worksheet(self._worksheet_name)
+                self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
+            except Exception as e:
+                raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e
+        else:
+            raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)
+
+        if self._dataframe.size == 0:
+            raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)
diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py
index 1321e9d6d..761ab81a9 100644
--- a/hed/models/column_mapper.py
+++ b/hed/models/column_mapper.py
@@ -134,6 +134,7 @@ def check_for_blank_names(column_map, allow_blank_names):
             return []
 
         issues = []
+
         for column_number, name in enumerate(column_map):
             if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                 issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
index dda340eb3..45c5947c2 100644
--- a/hed/models/definition_dict.py
+++ b/hed/models/definition_dict.py
@@ -117,14 +117,9 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
         def_issues = []
         for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
             group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler)
-            def_tag_name = definition_tag.extension
+            def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension)
 
-            def_takes_value = def_tag_name.lower().endswith("/#")
-            if def_takes_value:
-                def_tag_name = def_tag_name[:-len("/#")]
-
-            def_tag_lower = def_tag_name.lower()
-            if "/" in def_tag_lower or "#" in def_tag_lower:
+            if "/" in def_tag_name or "#" in def_tag_name:
                 new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                          DefinitionErrors.INVALID_DEFINITION_EXTENSION,
                                                                          tag=definition_tag,
@@ -134,29 +129,42 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
                 def_issues += new_def_issues
                 continue
 
-            new_def_issues += self._validate_contents(definition_tag, group_tag, error_handler)
+            new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler)
             new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler)
 
             if new_def_issues:
                 def_issues += new_def_issues
                 continue
 
-            if error_handler:
-                context = error_handler.get_error_context_copy()
-            else:
-                context = []
-            if def_tag_lower in self.defs:
-                new_def_issues += ErrorHandler.format_error_with_context(error_handler,
-                                                                         DefinitionErrors.DUPLICATE_DEFINITION,
-                                                                         def_name=def_tag_name)
+            new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler)
+            if new_def_issues:
                 def_issues += new_def_issues
                 continue
-            self.defs[def_tag_lower] = DefinitionEntry(name=def_tag_name, contents=group_tag,
-                                                       takes_value=def_takes_value,
-                                                       source_context=context)
+
+            self.defs[def_tag_name.lower()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
+                                                              takes_value=def_takes_value,
+                                                              source_context=context)
 
         return def_issues
 
+    def _strip_value_placeholder(self, def_tag_name):
+        def_takes_value = def_tag_name.lower().endswith("/#")
+        if def_takes_value:
+            def_tag_name = def_tag_name[:-len("/#")]
+        return def_tag_name, def_takes_value
+
+    def _validate_name_and_context(self, def_tag_name, error_handler):
+        if error_handler:
+            context = error_handler.get_error_context_copy()
+        else:
+            context = []
+        new_def_issues = []
+        if def_tag_name.lower() in self.defs:
+            new_def_issues += ErrorHandler.format_error_with_context(error_handler,
+                                                                     DefinitionErrors.DUPLICATE_DEFINITION,
+                                                                     def_name=def_tag_name)
+        return new_def_issues, context
+
     def _validate_placeholders(self, def_tag_name, group, def_takes_value, error_handler):
         new_issues = []
         placeholder_tags = []
@@ -245,11 +253,8 @@ def construct_def_tags(self, hed_string_obj):
         Parameters:
             hed_string_obj(HedString): The hed string to identify definition contents in
         """
-        for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
-            def_contents = self._get_definition_contents(def_tag)
-            if def_contents is not None:
-                def_tag._expandable = def_contents
-                def_tag._expanded = def_tag != def_expand_group
+        for tag in hed_string_obj.get_all_tags():
+            self.construct_def_tag(tag)
 
     def construct_def_tag(self, hed_tag):
         """ Identify def/def-expand tag contents in the given HedTag.
@@ -257,6 +262,8 @@ def construct_def_tag(self, hed_tag):
         Parameters:
             hed_tag(HedTag): The hed tag to identify definition contents in
         """
+        # Finish tracking down why parent is set incorrectly on def tags sometimes
+        # It should be ALWAYS set
         if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}:
             save_parent = hed_tag._parent
             def_contents = self._get_definition_contents(hed_tag)
@@ -277,24 +284,16 @@ def _get_definition_contents(self, def_tag):
             def_contents: HedGroup
             The contents to replace the previous def-tag with.
         """
-        is_label_tag = def_tag.extension
-        placeholder = None
-        found_slash = is_label_tag.find("/")
-        if found_slash != -1:
-            placeholder = is_label_tag[found_slash + 1:]
-            is_label_tag = is_label_tag[:found_slash]
-
-        label_tag_lower = is_label_tag.lower()
+        tag_label, _, placeholder = def_tag.extension.partition('/')
+
+        label_tag_lower = tag_label.lower()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
             # Could raise an error here?
             return None
-        else:
-            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
-            if def_tag_name:
-                return def_contents
 
-        return None
+        def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
+        return def_contents
 
     @staticmethod
     def get_as_strings(def_dict):
diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py
index 23845709a..4f3aa8d74 100644
--- a/hed/models/definition_entry.py
+++ b/hed/models/definition_entry.py
@@ -36,35 +36,31 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag
             return_copy_of_tag(bool): Set to true for validation
 
         Returns:
-            tuple:
-                str:          The expanded def tag name
-                HedGroup:     The contents of this definition(including the def tag itself)
+            HedGroup:     The contents of this definition(including the def tag itself)
 
         :raises ValueError:
             - Something internally went wrong with finding the placeholder tag.  This should not be possible.
         """
-        if self.takes_value == (placeholder_value is None):
-            return None, []
+        if self.takes_value == (not placeholder_value):
+            return None
 
         if return_copy_of_tag:
             replace_tag = replace_tag.copy()
         output_contents = [replace_tag]
-        name = self.name
         if self.contents:
             output_group = self.contents
-            if placeholder_value is not None:
+            if placeholder_value:
                 output_group = copy.deepcopy(self.contents)
                 placeholder_tag = output_group.find_placeholder_tag()
                 if not placeholder_tag:
                     raise ValueError("Internal error related to placeholders in definition mapping")
-                name = f"{name}/{placeholder_value}"
                 placeholder_tag.replace_placeholder(placeholder_value)
 
             output_contents = [replace_tag, output_group]
 
         output_contents = HedGroup(replace_tag._hed_string,
                                    startpos=replace_tag.span[0], endpos=replace_tag.span[1], contents=output_contents)
-        return f"{DefTagNames.DEF_EXPAND_ORG_KEY}/{name}", output_contents
+        return output_contents
 
     def __str__(self):
         return str(self.contents)
diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py
index 8a9806d42..d976fe6b8 100644
--- a/hed/models/expression_parser.py
+++ b/hed/models/expression_parser.py
@@ -78,10 +78,10 @@ def __init__(self, text):
             "(": Token.LogicalGroup,
             ")": Token.LogicalGroupEnd,
             "~": Token.LogicalNegation,
-            "?": Token.Wildcard, # Any tag or group
-            "??": Token.Wildcard, # Any tag
-            "???": Token.Wildcard, # Any Group
-            "{": Token.ExactMatch, # Nothing else
+            "?": Token.Wildcard,  # Any tag or group
+            "??": Token.Wildcard,  # Any tag
+            "???": Token.Wildcard,  # Any Group
+            "{": Token.ExactMatch,  # Nothing else
             "}": Token.ExactMatchEnd,  # Nothing else
             "@": Token.NotInLine
         }
@@ -218,6 +218,7 @@ def handle_expr(self, hed_group, exact=False):
         all_found_groups = [search_result(group, tag) for tag, group in groups_found]
         return all_found_groups
 
+
 class ExpressionOr(Expression):
     def handle_expr(self, hed_group, exact=False):
         groups1 = self.left.handle_expr(hed_group, exact=exact)
@@ -229,7 +230,7 @@ def handle_expr(self, hed_group, exact=False):
         for group in groups1:
             for other_group in groups2:
                 if group.has_same_tags(other_group):
-                        duplicates.append(group)
+                    duplicates.append(group)
 
         groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)]
 
@@ -245,12 +246,13 @@ def __str__(self):
         output_str += ")"
         return output_str
 
+
 class ExpressionNegation(Expression):
     def handle_expr(self, hed_group, exact=False):
         found_groups = self.right.handle_expr(hed_group, exact=exact)
 
         # Todo: this may need more thought with respects to wildcards and negation
-        #negated_groups = [group for group in hed_group.get_all_groups() if group not in groups]
+        # negated_groups = [group for group in hed_group.get_all_groups() if group not in groups]
         # This simpler version works on python >= 3.9
         # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups]
         # Python 3.7/8 compatible version.
@@ -259,6 +261,7 @@ def handle_expr(self, hed_group, exact=False):
 
         return negated_groups
 
+
 class ExpressionContainingGroup(Expression):
     def handle_expr(self, hed_group, exact=False):
         result = self.right.handle_expr(hed_group, exact=True)
@@ -310,7 +313,32 @@ def handle_expr(self, hed_group, exact=False):
 
 class QueryParser:
     """Parse a search expression into a form than can be used to search a hed string."""
+
     def __init__(self, expression_string):
+        """Compiles a QueryParser for a particular expression, so it can be used to search hed strings.
+
+
+        Basic Input Examples:
+
+        'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
+
+        'Event and Action' - Find any strings with Event and Action, including descendant tags
+
+        'Event or Action' - Same as above, but it has either
+
+        '"Event"' - Finds the Event tag, but not any descendent tags
+
+        'Def/DefName/*' - Find Def/DefName instances with placeholders, regardless of the value of the placeholder
+
+        'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event
+
+        '[Event and Action]' - Find a group that contains both Event and Action(at any level)
+
+        '[[Event and Action]]' - Find a group with Event And Action at the same level.
+
+        Parameters:
+            expression_string(str): The query string
+        """
         self.tokens = []
         self.at_token = -1
         self.tree = self._parse(expression_string.lower())
@@ -360,7 +388,8 @@ def _handle_negation(self):
             return self._handle_grouping_op()
 
     def _handle_grouping_op(self):
-        next_token = self._next_token_is([Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
+        next_token = self._next_token_is(
+            [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
         if next_token == Token.ContainingGroup:
             interior = self._handle_and_op()
             expr = ExpressionContainingGroup(next_token, right=interior)
diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py
index 98e81df42..1f1c02d41 100644
--- a/hed/models/hed_group.py
+++ b/hed/models/hed_group.py
@@ -92,7 +92,7 @@ def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]):
         """
         empty_groups = []
         # Filter out duplicates
-        items_to_remove = {id(item):item for item in items_to_remove}.values()
+        items_to_remove = {id(item): item for item in items_to_remove}.values()
 
         for item in items_to_remove:
             group = item._parent
@@ -363,7 +363,8 @@ def __eq__(self, other):
         return True
 
     def find_tags(self, search_tags, recursive=False, include_groups=2):
-        """ Find the tags and their containing groups.
+        """ Find the base tags and their containing groups.
+        This searches by short_base_tag, ignoring any ancestors or extensions/values.
 
         Parameters:
             search_tags (container):    A container of short_base_tags to locate
@@ -375,23 +376,16 @@ def find_tags(self, search_tags, recursive=False, include_groups=2):
 
         Returns:
             list: The contents of the list depends on the value of include_groups.
-
-        Notes:
-
-            - This can only find identified tags.
-            - By default, definition, def, def-expand, onset, and offset are identified, even without a schema.
-
         """
         found_tags = []
         if recursive:
-            groups = self.get_all_groups()
+            tags = self.get_all_tags()
         else:
-            groups = (self, )
+            tags = self.tags()
 
-        for sub_group in groups:
-            for tag in sub_group.tags():
-                if tag.short_base_tag.lower() in search_tags:
-                    found_tags.append((tag, sub_group))
+        for tag in tags:
+            if tag.short_base_tag.lower() in search_tags:
+                found_tags.append((tag, tag._parent))
 
         if include_groups == 0 or include_groups == 1:
             return [tag[include_groups] for tag in found_tags]
@@ -400,6 +394,10 @@ def find_tags(self, search_tags, recursive=False, include_groups=2):
     def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2):
         """ Find the tags and their containing groups.
 
+            This searches tag.short_tag, with an implicit wildcard on the end.
+
+            e.g. "Eve" will find Event, but not Sensory-event
+
         Parameters:
             search_tags (container):    A container of the starts of short tags to search.
             recursive (bool):           If true, also check subgroups.
@@ -413,55 +411,46 @@ def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2):
         """
         found_tags = []
         if recursive:
-            groups = self.get_all_groups()
+            tags = self.get_all_tags()
         else:
-            groups = (self, )
+            tags = self.tags()
 
-        for sub_group in groups:
-            for tag in sub_group.tags():
-                for search_tag in search_tags:
-                    if tag.short_tag.lower().startswith(search_tag):
-                        found_tags.append((tag, sub_group))
+        for tag in tags:
+            for search_tag in search_tags:
+                if tag.short_tag.lower().startswith(search_tag):
+                    found_tags.append((tag, tag._parent))
+                    # We can't find the same tag twice
+                    break
 
         if include_groups == 0 or include_groups == 1:
             return [tag[include_groups] for tag in found_tags]
         return found_tags
 
-    def find_exact_tags(self, tags_or_groups, recursive=False, include_groups=1):
-        """  Find the given tags or groups.
+    def find_exact_tags(self, exact_tags, recursive=False, include_groups=1):
+        """  Find the given tags.  This will only find complete matches, any extension or value must also match.
 
         Parameters:
-            tags_or_groups (HedTag, HedGroup): A container of tags to locate.
+            exact_tags (list of HedTag): A container of tags to locate.
             recursive (bool): If true, also check subgroups.
             include_groups(bool): 0, 1 or 2
                 If 0: Return only tags
                 If 1: Return only groups
                 If 2 or any other value: Return both
         Returns:
-            list: A list of HedGroups the given tags/groups were found in.
-
-        Notes:
-            - If you pass in groups it will only find EXACT matches.
-            - This can only find identified tags.
-            - By default, definition, def, def-expand, onset, and offset are identified, even without a schema.
-            - If this is a HedGroup, order matters.  (b, a) != (a, b)
-
+            list: A list of tuples. The contents depend on the values of the include_group.
         """
         found_tags = []
         if recursive:
-            groups = self.get_all_groups()
+            tags = self.get_all_tags()
         else:
-            groups = (self,)
+            tags = self.tags()
 
-        for sub_group in groups:
-            for search_tag in tags_or_groups:
-                for tag in sub_group.children:
-                    if tag == search_tag:
-                        found_tags.append((tag, sub_group))
+        for tag in tags:
+            if tag in exact_tags:
+                found_tags.append((tag, tag._parent))
 
         if include_groups == 0 or include_groups == 1:
             return [tag[include_groups] for tag in found_tags]
-
         return found_tags
 
     def find_def_tags(self, recursive=False, include_groups=3):
@@ -481,7 +470,7 @@ def find_def_tags(self, recursive=False, include_groups=3):
         if recursive:
             groups = self.get_all_groups()
         else:
-            groups = (self, )
+            groups = (self,)
 
         def_tags = []
         for group in groups:
@@ -516,16 +505,15 @@ def find_tags_with_term(self, term, recursive=False, include_groups=2):
         """
         found_tags = []
         if recursive:
-            groups = self.get_all_groups()
+            tags = self.get_all_tags()
         else:
-            groups = (self,)
+            tags = self.tags()
 
         search_for = term.lower()
-        for sub_group in groups:
-            for tag in sub_group.tags():
-                if search_for in tag.tag_terms:
-                    found_tags.append((tag, sub_group))
+        for tag in tags:
+            if search_for in tag.tag_terms:
+                found_tags.append((tag, tag._parent))
 
         if include_groups == 0 or include_groups == 1:
             return [tag[include_groups] for tag in found_tags]
-        return found_tags
+        return found_tags
\ No newline at end of file
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 408b8d984..2ea635050 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -115,10 +115,8 @@ def __eq__(self, other):
             return False
         if self.attributes != other.attributes:
             # We only want to compare known attributes
-            self_attr = {key: value for key, value in self.attributes.items()
-                         if not self._unknown_attributes or key not in self._unknown_attributes}
-            other_attr = {key: value for key, value in other.attributes.items()
-                          if not other._unknown_attributes or key not in other._unknown_attributes}
+            self_attr = self.get_known_attributes()
+            other_attr = other.get_known_attributes()
             if self_attr != other_attr:
                 return False
         if self.description != other.description:
@@ -131,6 +129,10 @@ def __hash__(self):
     def __str__(self):
         return self.name
 
+    def get_known_attributes(self):
+        return {key: value for key, value in self.attributes.items()
+                if not self._unknown_attributes or key not in self._unknown_attributes}
+
 
 class UnitClassEntry(HedSchemaEntry):
     """ A single unit class entry in the HedSchema. """
@@ -235,6 +237,20 @@ def has_attribute(self, attribute, return_value=False):
             val = val is not None
         return val
 
+    def _check_inherited_attribute_internal(self, attribute):
+        """Gather up all instances of an attribute from this entry and any parent entries"""
+        attribute_values = []
+
+        iter_entry = self
+        while iter_entry is not None:
+            if iter_entry.takes_value_child_entry:
+                break
+            if attribute in iter_entry.attributes:
+                attribute_values.append(iter_entry.attributes[attribute])
+            iter_entry = iter_entry._parent_tag
+
+        return attribute_values
+
     def _check_inherited_attribute(self, attribute, return_value=False, return_union=False):
         """
         Checks for the existence of an attribute in this entry and its parents.
@@ -253,29 +269,15 @@ def _check_inherited_attribute(self, attribute, return_value=False, return_union
             - The existence of an attribute does not guarantee its validity.
             - For string attributes, the values are joined with a comma as a delimiter from all ancestors.
         """
-        if return_value:
-            attribute_values = []
-
-        iter_entry = self
-        while iter_entry is not None:
-            if iter_entry.takes_value_child_entry:
-                break
-            if attribute in iter_entry.attributes:
-                if return_value:
-                    attribute_values.append(iter_entry.attributes[attribute])
-                    if not return_union:
-                        return attribute_values[0]
-                else:
-                    return True
-            iter_entry = iter_entry._parent_tag
+        attribute_values = self._check_inherited_attribute_internal(attribute)
 
         if return_value:
             if not attribute_values:
                 return None
-            # Always return the union as a non-union would've been returned above
-            return ",".join(attribute_values)
-        else:
-            return False
+            if return_union:
+                return ",".join(attribute_values)
+            return attribute_values[0]
+        return bool(attribute_values)
 
     def base_tag_has_attribute(self, tag_attribute):
         """ Check if the base tag has a specific attribute.
@@ -309,21 +311,19 @@ def parent_name(self):
         parent_name, _, child_name = self.name.rpartition("/")
         return parent_name
 
+    def _finalize_classes(self, schema, attribute_key, section_key):
+        result = {}
+        if attribute_key in self.attributes:
+            for attribute_name in self.attributes[attribute_key].split(","):
+                entry = schema._get_tag_entry(attribute_name, section_key)
+                if entry:
+                    result[attribute_name] = entry
+        return result
+
     def _finalize_takes_value_tag(self, schema):
         if self.name.endswith("/#"):
-            if HedKey.UnitClass in self.attributes:
-                self.unit_classes = {}
-                for unit_class_name in self.attributes[HedKey.UnitClass].split(","):
-                    entry = schema._get_tag_entry(unit_class_name, HedSectionKey.UnitClasses)
-                    if entry:
-                        self.unit_classes[unit_class_name] = entry
-
-            if HedKey.ValueClass in self.attributes:
-                self.value_classes = {}
-                for value_class_name in self.attributes[HedKey.ValueClass].split(","):
-                    entry = schema._get_tag_entry(value_class_name, HedSectionKey.ValueClasses)
-                    if entry:
-                        self.value_classes[value_class_name] = entry
+            self.unit_classes = self._finalize_classes(schema, HedKey.UnitClass, HedSectionKey.UnitClasses)
+            self.value_classes = self._finalize_classes(schema, HedKey.ValueClass, HedSectionKey.ValueClasses)
 
     def _finalize_inherited_attributes(self):
         # Replace the list with a copy we can modify.
diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py
index 0eb159976..fcafcf87b 100644
--- a/hed/validator/def_validator.py
+++ b/hed/validator/def_validator.py
@@ -103,14 +103,9 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
         """
         def_issues = []
         is_def_expand_tag = def_expand_group != def_tag
-        is_label_tag = def_tag.extension
-        placeholder = None
-        found_slash = is_label_tag.find("/")
-        if found_slash != -1:
-            placeholder = is_label_tag[found_slash + 1:]
-            is_label_tag = is_label_tag[:found_slash]
-
-        label_tag_lower = is_label_tag.lower()
+        tag_label, _, placeholder = def_tag.extension.partition('/')
+
+        label_tag_lower = tag_label.lower()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
             error_code = ValidationErrors.HED_DEF_UNMATCHED
@@ -118,9 +113,9 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
                 error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED
             def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
         else:
-            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder,
+            def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder,
                                                                   return_copy_of_tag=True)
-            if def_tag_name:
+            if def_contents is not None:
                 if is_def_expand_tag and def_expand_group != def_contents:
                     def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
                                                             tag=def_tag, actual_def=def_contents,
diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py
index f44b291ba..b1d928347 100644
--- a/hed/validator/onset_validator.py
+++ b/hed/validator/onset_validator.py
@@ -70,12 +70,8 @@ def _find_onset_tags(self, hed_string_obj):
 
     def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
         is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY
-        full_def_name = def_name = def_tag.extension
-        placeholder = None
-        found_slash = def_name.find("/")
-        if found_slash != -1:
-            placeholder = def_name[found_slash + 1:]
-            def_name = def_name[:found_slash]
+        full_def_name = def_tag.extension
+        def_name, _, placeholder = def_tag.extension.partition('/')
 
         def_entry = self._defs.get(def_name)
         if def_entry is None:
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index d93983d59..5f8b2bbab 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -75,7 +75,6 @@ def test_invalid_input_type_dict(self):
             BaseInput({'key': 'value'})
 
 
-
 class TestInsertColumns(unittest.TestCase):
 
     def test_insert_columns_simple(self):
@@ -272,4 +271,3 @@ def test_combine_dataframe_with_mixed_values(self):
         result = BaseInput.combine_dataframe(loaded_df)
         expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
         self.assertTrue(result.equals(expected))
-
diff --git a/tests/schema/test_schema_entry.py b/tests/schema/test_schema_entry.py
index ae8812942..0985ce2b7 100644
--- a/tests/schema/test_schema_entry.py
+++ b/tests/schema/test_schema_entry.py
@@ -7,6 +7,7 @@ def __init__(self, attributes, parent=None):
         self._parent_tag = parent
 
     _check_inherited_attribute = HedTagEntry._check_inherited_attribute
+    _check_inherited_attribute_internal = HedTagEntry._check_inherited_attribute_internal
 
 
 class TestMockEntry(unittest.TestCase):