Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions hed/models/hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ def split_into_groups(hed_string, hed_schema, def_dict=None):
current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
# if prev_delimiter == ",":
# raise ValueError(f"Closing parentheses in HED string {hed_string}")
# Terminate existing group, and save it off.
paren_end = startpos + delimiter_index + 1

Expand Down Expand Up @@ -296,22 +294,19 @@ def split_hed_string(hed_string):

if char in tag_delimiters:
if found_symbol:
# view_string = hed_string[last_end_pos: i]
if last_end_pos != i:
result_positions.append((False, (last_end_pos, i)))
last_end_pos = i
elif not found_symbol:
found_symbol = True
last_end_pos = i - current_spacing
# view_string = hed_string[tag_start_pos: last_end_pos]
result_positions.append((True, (tag_start_pos, last_end_pos)))
current_spacing = 0
tag_start_pos = None
continue

# If we have a current delimiter, end it here.
if found_symbol and last_end_pos is not None:
# view_string = hed_string[last_end_pos: i]
if last_end_pos != i:
result_positions.append((False, (last_end_pos, i)))
last_end_pos = None
Expand All @@ -322,10 +317,8 @@ def split_hed_string(hed_string):
tag_start_pos = i

if last_end_pos is not None and len(hed_string) != last_end_pos:
# view_string = hed_string[last_end_pos: len(hed_string)]
result_positions.append((False, (last_end_pos, len(hed_string))))
if tag_start_pos is not None:
# view_string = hed_string[tag_start_pos: len(hed_string)]
result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
if current_spacing:
result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))
Expand Down
32 changes: 26 additions & 6 deletions hed/models/query_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def __init__(self, token, left=None, right=None):
self._match_mode = 2
token.text = token.text.replace("*", "")

def _get_parent_groups(self, search_results):
@staticmethod
def _get_parent_groups(search_results):
found_parent_groups = []
if search_results:
for group in search_results:
Expand All @@ -41,6 +42,14 @@ def __str__(self):
return output_str

def handle_expr(self, hed_group, exact=False):
"""Handles parsing the given expression, recursively down the list as needed.

BaseClass implementation is search terms.

Parameters:
hed_group(HedGroup): The object to search
exact(bool): If True, we are only looking for groups containing this term directly, not descendants.
"""
if self._match_mode == 2:
groups_found = hed_group.find_wildcard_tags([self.token.text], recursive=True, include_groups=2)
elif self._match_mode:
Expand Down Expand Up @@ -76,18 +85,28 @@ def handle_expr(self, hed_group, exact=False):
return groups1
groups2 = self.right.handle_expr(hed_group, exact=exact)

return self.merge_groups(groups1, groups2)
return self.merge_and_groups(groups1, groups2)

@staticmethod
def merge_groups(groups1, groups2):
def merge_and_groups(groups1, groups2):
"""Finds any shared results

Parameters:
groups1(list): a list of search results
groups2(list): a list of search results

Returns:
combined_groups(list): groups in both lists narrowed down results to where none of the tags overlap
"""
return_list = []
for group in groups1:
for other_group in groups2:
if group.group is other_group.group:
# At this point any shared tags between the two groups invalidates it.
if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
continue
merged_result = group.merge_result(other_group)
# Merge the two groups tags into one new result, now that we've verified they're unique
merged_result = group.merge_and_result(other_group)

dont_add = False
# This is trash and slow
Expand Down Expand Up @@ -195,7 +214,8 @@ def __init__(self, token, left=None, right=None):
super().__init__(token, left, right)
self.optional = "any"

def _filter_exact_matches(self, search_results):
@staticmethod
def _filter_exact_matches(search_results):
filtered_list = []
for group in search_results:
if len(group.group.children) == len(group.tags):
Expand All @@ -215,7 +235,7 @@ def handle_expr(self, hed_group, exact=False):
# Basically if we don't have an exact match above, do the more complex matching including optional
if self.left:
optional_groups = self.left.handle_expr(hed_group, exact=True)
found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
found_groups = ExpressionAnd.merge_and_groups(found_groups, optional_groups)

filtered_list = self._filter_exact_matches(found_groups)
if filtered_list:
Expand Down
77 changes: 43 additions & 34 deletions hed/models/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,63 @@ def __init__(self, expression_string):
self.tree = self._parse(expression_string.lower())
self._org_string = expression_string

def search(self, hed_string_obj):
"""Returns if a match is found in the given string

Parameters:
hed_string_obj (HedString): String to search

Returns:
list(SearchResult): Generally you should just treat this as a bool
True if a match was found.
"""
current_node = self.tree

result = current_node.handle_expr(hed_string_obj)
return result

def __str__(self):
return str(self.tree)

def _get_next_token(self):
"""Returns the current token and advances the counter"""
self.at_token += 1
if self.at_token >= len(self.tokens):
raise ValueError("Parse error in get next token")
return self.tokens[self.at_token]

def _next_token_is(self, kinds):
"""Returns the current token if it matches kinds, and advances the counter"""
if self.at_token + 1 >= len(self.tokens):
return None
if self.tokens[self.at_token + 1].kind in kinds:
return self._get_next_token()
return None

def current_token(self):
if self.at_token + 1 >= len(self.tokens):
return None
return self.tokens[self.at_token].text
def _parse(self, expression_string):
"""Parse the string and build an expression tree"""
self.tokens = self._tokenize(expression_string)

expr = self._handle_or_op()

if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")

return expr

@staticmethod
def _tokenize(expression_string):
"""Tokenize the expression string into a list"""
grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
paren_re = r"\)|\(|~"
word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
re_string = fr"({grouping_re}|{paren_re}|{word_re})"
token_re = re.compile(re_string)

tokens = token_re.findall(expression_string)
tokens = [Token(token) for token in tokens]

return tokens

def _handle_and_op(self):
expr = self._handle_negation()
Expand All @@ -79,10 +116,10 @@ def _handle_and_op(self):
return expr

def _handle_or_op(self):
expr = self._handle_and_op() # Note: calling _handle_and_op here
expr = self._handle_and_op()
next_token = self._next_token_is([Token.Or])
while next_token:
right = self._handle_and_op() # Note: calling _handle_and_op here
right = self._handle_and_op()
if next_token.kind == Token.Or:
expr = ExpressionOr(next_token, expr, right)
next_token = self._next_token_is([Token.Or])
Expand Down Expand Up @@ -143,31 +180,3 @@ def _handle_grouping_op(self):
expr = None

return expr

def _parse(self, expression_string):
self.tokens = self._tokenize(expression_string)

expr = self._handle_or_op()

if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")

return expr

def _tokenize(self, expression_string):
grouping_re = r"\[\[|\[|\]\]|\]|}|{|:"
paren_re = r"\)|\(|~"
word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+"
re_string = fr"({grouping_re}|{paren_re}|{word_re})"
token_re = re.compile(re_string)

tokens = token_re.findall(expression_string)
tokens = [Token(token) for token in tokens]

return tokens

def search(self, hed_string_obj):
current_node = self.tree

result = current_node.handle_expr(hed_string_obj)
return result
2 changes: 1 addition & 1 deletion hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_query_handlers(queries, query_names=None):
return None, None, [f"EmptyQueries: The queries list must not be empty"]
elif isinstance(queries, str):
queries = [queries]
expression_parsers = [None for i in range(len(queries))]
expression_parsers = [None] * len(queries)
issues = []
if not query_names:
query_names = [f"query_{index}" for index in range(len(queries))]
Expand Down
18 changes: 4 additions & 14 deletions hed/models/query_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@ def __init__(self, group, tag):
new_tags = tag.copy()
self.tags = new_tags

def __eq__(self, other):
if isinstance(other, SearchResult):
return self.group == other.group
return other == self.group

def merge_result(self, other):
def merge_and_result(self, other):
"""Returns a new result, with the combined tags/groups from this and other."""
# Returns a new
new_tags = self.tags.copy()
for tag in other.tags:
Expand All @@ -31,6 +27,7 @@ def merge_result(self, other):
return SearchResult(self.group, new_tags)

def has_same_tags(self, other):
"""Checks if these two results have the same tags/groups by identity(not equality)"""
if self.group != other.group:
return False

Expand All @@ -42,16 +39,9 @@ def has_same_tags(self, other):
def __str__(self):
return str(self.group) + " Tags: " + "---".join([str(tag) for tag in self.tags])

def get_tags_only(self):
from hed import HedTag
return [tag for tag in self.tags if isinstance(tag, HedTag)]

def get_groups_only(self):
from hed import HedTag
return [tag for tag in self.tags if not isinstance(tag, HedTag)]


class Token:
"""Represents a single term/character"""
And = 0
Tag = 1
DescendantGroup = 4
Expand Down
Loading