hed-standard · VisLab · Feb 17, 2024 · Feb 17, 2024
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
@@ -207,8 +207,6 @@ def split_into_groups(hed_string, hed_schema, def_dict=None):
                     current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))
 
                 if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
-                    # if prev_delimiter == ",":
-                    #     raise ValueError(f"Closing parentheses in HED string {hed_string}")
                     # Terminate existing group, and save it off.
                     paren_end = startpos + delimiter_index + 1
 
@@ -296,22 +294,19 @@ def split_hed_string(hed_string):
 
             if char in tag_delimiters:
                 if found_symbol:
-                    # view_string = hed_string[last_end_pos: i]
                     if last_end_pos != i:
                         result_positions.append((False, (last_end_pos, i)))
                     last_end_pos = i
                 elif not found_symbol:
                     found_symbol = True
                     last_end_pos = i - current_spacing
-                    # view_string = hed_string[tag_start_pos: last_end_pos]
                     result_positions.append((True, (tag_start_pos, last_end_pos)))
                     current_spacing = 0
                     tag_start_pos = None
                 continue
 
             # If we have a current delimiter, end it here.
             if found_symbol and last_end_pos is not None:
-                # view_string = hed_string[last_end_pos: i]
                 if last_end_pos != i:
                     result_positions.append((False, (last_end_pos, i)))
                 last_end_pos = None
@@ -322,10 +317,8 @@ def split_hed_string(hed_string):
                 tag_start_pos = i
 
         if last_end_pos is not None and len(hed_string) != last_end_pos:
-            # view_string = hed_string[last_end_pos: len(hed_string)]
             result_positions.append((False, (last_end_pos, len(hed_string))))
         if tag_start_pos is not None:
-            # view_string = hed_string[tag_start_pos: len(hed_string)]
             result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
             if current_spacing:
                 result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))

diff --git a/hed/models/query_expressions.py b/hed/models/query_expressions.py
@@ -20,7 +20,8 @@ def __init__(self, token, left=None, right=None):
             self._match_mode = 2
             token.text = token.text.replace("*", "")
 
-    def _get_parent_groups(self, search_results):
+    @staticmethod
+    def _get_parent_groups(search_results):
         found_parent_groups = []
         if search_results:
             for group in search_results:
@@ -41,6 +42,14 @@ def __str__(self):
         return output_str
 
     def handle_expr(self, hed_group, exact=False):
+        """Handles parsing the given expression, recursively down the list as needed.
+
+           BaseClass implementation is search terms.
+
+           Parameters:
+               hed_group(HedGroup): The object to search
+               exact(bool): If True, we are only looking for groups containing this term directly, not descendants.
+           """
         if self._match_mode == 2:
             groups_found = hed_group.find_wildcard_tags([self.token.text], recursive=True, include_groups=2)
         elif self._match_mode:
@@ -76,18 +85,28 @@ def handle_expr(self, hed_group, exact=False):
             return groups1
         groups2 = self.right.handle_expr(hed_group, exact=exact)
 
-        return self.merge_groups(groups1, groups2)
+        return self.merge_and_groups(groups1, groups2)
 
     @staticmethod
-    def merge_groups(groups1, groups2):
+    def merge_and_groups(groups1, groups2):
+        """Finds any shared results
+
+        Parameters:
+            groups1(list): a list of search results
+            groups2(list): a list of search results
+
+        Returns:
+            combined_groups(list): groups in both lists narrowed down results to where none of the tags overlap
+        """
         return_list = []
         for group in groups1:
             for other_group in groups2:
                 if group.group is other_group.group:
                     # At this point any shared tags between the two groups invalidates it.
                     if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
                         continue
-                    merged_result = group.merge_result(other_group)
+                    # Merge the two groups tags into one new result, now that we've verified they're unique
+                    merged_result = group.merge_and_result(other_group)
 
                     dont_add = False
                     # This is trash and slow
@@ -195,7 +214,8 @@ def __init__(self, token, left=None, right=None):
         super().__init__(token, left, right)
         self.optional = "any"
 
-    def _filter_exact_matches(self, search_results):
+    @staticmethod
+    def _filter_exact_matches(search_results):
         filtered_list = []
         for group in search_results:
             if len(group.group.children) == len(group.tags):
@@ -215,7 +235,7 @@ def handle_expr(self, hed_group, exact=False):
         # Basically if we don't have an exact match above, do the more complex matching including optional
         if self.left:
             optional_groups = self.left.handle_expr(hed_group, exact=True)
-            found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
+            found_groups = ExpressionAnd.merge_and_groups(found_groups, optional_groups)
 
         filtered_list = self._filter_exact_matches(found_groups)
         if filtered_list:

diff --git a/hed/models/query_handler.py b/hed/models/query_handler.py
@@ -47,26 +47,63 @@ def __init__(self, expression_string):
         self.tree = self._parse(expression_string.lower())
         self._org_string = expression_string
 
+    def search(self, hed_string_obj):
+        """Returns if a match is found in the given string
+
+        Parameters:
+            hed_string_obj (HedString): String to search
+
+        Returns:
+            list(SearchResult): Generally you should just treat this as a bool
+                                True if a match was found.
+        """
+        current_node = self.tree
+
+        result = current_node.handle_expr(hed_string_obj)
+        return result
+
     def __str__(self):
         return str(self.tree)
 
     def _get_next_token(self):
+        """Returns the current token and advances the counter"""
         self.at_token += 1
         if self.at_token >= len(self.tokens):
             raise ValueError("Parse error in get next token")
         return self.tokens[self.at_token]
 
     def _next_token_is(self, kinds):
+        """Returns the current token if it matches kinds, and advances the counter"""
         if self.at_token + 1 >= len(self.tokens):
             return None
         if self.tokens[self.at_token + 1].kind in kinds:
             return self._get_next_token()
         return None
 
-    def current_token(self):
-        if self.at_token + 1 >= len(self.tokens):
-            return None
-        return self.tokens[self.at_token].text
+    def _parse(self, expression_string):
+        """Parse the string and build an expression tree"""
+        self.tokens = self._tokenize(expression_string)
+
+        expr = self._handle_or_op()
+
+        if self.at_token + 1 != len(self.tokens):
+            raise ValueError("Parse error in search string")
+
+        return expr
+
+    @staticmethod
+    def _tokenize(expression_string):
+        """Tokenize the expression string into a list"""
+        grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
+        paren_re = r"\)|\(|~"
+        word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
+        re_string = fr"({grouping_re}|{paren_re}|{word_re})"
+        token_re = re.compile(re_string)
+
+        tokens = token_re.findall(expression_string)
+        tokens = [Token(token) for token in tokens]
+
+        return tokens
 
     def _handle_and_op(self):
         expr = self._handle_negation()
@@ -79,10 +116,10 @@ def _handle_and_op(self):
         return expr
 
     def _handle_or_op(self):
-        expr = self._handle_and_op()  # Note: calling _handle_and_op here
+        expr = self._handle_and_op()
         next_token = self._next_token_is([Token.Or])
         while next_token:
-            right = self._handle_and_op()  # Note: calling _handle_and_op here
+            right = self._handle_and_op()
             if next_token.kind == Token.Or:
                 expr = ExpressionOr(next_token, expr, right)
             next_token = self._next_token_is([Token.Or])
@@ -143,31 +180,3 @@ def _handle_grouping_op(self):
                 expr = None
 
         return expr
-
-    def _parse(self, expression_string):
-        self.tokens = self._tokenize(expression_string)
-
-        expr = self._handle_or_op()
-
-        if self.at_token + 1 != len(self.tokens):
-            raise ValueError("Parse error in search string")
-
-        return expr
-
-    def _tokenize(self, expression_string):
-        grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
-        paren_re = r"\)|\(|~"
-        word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
-        re_string = fr"({grouping_re}|{paren_re}|{word_re})"
-        token_re = re.compile(re_string)
-
-        tokens = token_re.findall(expression_string)
-        tokens = [Token(token) for token in tokens]
-
-        return tokens
-
-    def search(self, hed_string_obj):
-        current_node = self.tree
-
-        result = current_node.handle_expr(hed_string_obj)
-        return result
diff --git a/hed/models/query_service.py b/hed/models/query_service.py
@@ -21,7 +21,7 @@ def get_query_handlers(queries, query_names=None):
         return None, None, [f"EmptyQueries: The queries list must not be empty"]
     elif isinstance(queries, str):
         queries = [queries]
-    expression_parsers = [None for i in range(len(queries))]
+    expression_parsers = [None] * len(queries)
     issues = []
     if not query_names:
         query_names = [f"query_{index}" for index in range(len(queries))]

diff --git a/hed/models/query_util.py b/hed/models/query_util.py
@@ -12,12 +12,8 @@ def __init__(self, group, tag):
             new_tags = tag.copy()
         self.tags = new_tags
 
-    def __eq__(self, other):
-        if isinstance(other, SearchResult):
-            return self.group == other.group
-        return other == self.group
-
-    def merge_result(self, other):
+    def merge_and_result(self, other):
+        """Returns a new result, with the combined tags/groups from this and other."""
         # Returns a new
         new_tags = self.tags.copy()
         for tag in other.tags:
@@ -31,6 +27,7 @@ def merge_result(self, other):
         return SearchResult(self.group, new_tags)
 
     def has_same_tags(self, other):
+        """Checks if these two results have the same tags/groups by identity(not equality)"""
         if self.group != other.group:
             return False
 
@@ -42,16 +39,9 @@ def has_same_tags(self, other):
     def __str__(self):
         return str(self.group) + " Tags: " + "---".join([str(tag) for tag in self.tags])
 
-    def get_tags_only(self):
-        from hed import HedTag
-        return [tag for tag in self.tags if isinstance(tag, HedTag)]
-
-    def get_groups_only(self):
-        from hed import HedTag
-        return [tag for tag in self.tags if not isinstance(tag, HedTag)]
-
 
 class Token:
+    """Represents a single term/character"""
     And = 0
     Tag = 1
     DescendantGroup = 4