From d7055cca40d860c2153642c66b9343b53fae1818 Mon Sep 17 00:00:00 2001 From: IanCa Date: Fri, 25 Aug 2023 18:19:39 -0500 Subject: [PATCH] Update searching. Add optional field to exact match notation {required_expression: optional_expression}. Ban negation of wildcards due to poorly defined behavior. --- hed/models/expression_parser.py | 40 +++++++++++++++++++++++--- tests/models/test_expression_parser.py | 37 +++++++++++++++++++++++- 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index d976fe6b8..736ed625b 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -64,6 +64,7 @@ class Token: Wildcard = 10 ExactMatch = 11 ExactMatchEnd = 12 + ExactMatchOptional = 14 NotInLine = 13 # Not currently a token. In development and may become one. def __init__(self, text): @@ -83,6 +84,7 @@ def __init__(self, text): "???": Token.Wildcard, # Any Group "{": Token.ExactMatch, # Nothing else "}": Token.ExactMatchEnd, # Nothing else + ":": Token.ExactMatchOptional, "@": Token.NotInLine } self.kind = tokens.get(text, Token.Tag) @@ -158,7 +160,11 @@ def handle_expr(self, hed_group, exact=False): if not groups1: return groups1 groups2 = self.right.handle_expr(hed_group, exact=exact) - # this is slow... + + return self.merge_groups(groups1, groups2) + + @staticmethod + def merge_groups(groups1, groups2): return_list = [] for group in groups1: for other_group in groups2: @@ -308,6 +314,20 @@ def handle_expr(self, hed_group, exact=False): if return_list: return return_list + # Basically if we don't have an exact match above, do the more complex matching including optional + if self.left: + optional_groups = self.left.handle_expr(hed_group, exact=True) + found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups) + + if found_groups: + return_list = [] + for group in found_groups: + if len(group.group.children) == len(group.tags): + return_list.append(group) + + if return_list: + return return_list + return [] @@ -336,6 +356,11 @@ def __init__(self, expression_string): '[[Event and Action]]' - Find a group with Event And Action at the same level. + Practical Complex Example: + + [[{(Onset or Offset), (Def or [[Def-expand]]): ???}]] - A group with an onset tag, + a def tag or def-expand group, and an optional wildcard group + Parameters: expression_string(str): The query string """ @@ -382,6 +407,9 @@ def _handle_negation(self): next_token = self._next_token_is([Token.LogicalNegation]) if next_token == Token.LogicalNegation: interior = self._handle_grouping_op() + if "?" in str(interior): + raise ValueError("Cannot negate wildcards, or expressions that contain wildcards." + "Use {required_expression : optional_expression}.") expr = ExpressionNegation(next_token, right=interior) return expr else: @@ -411,8 +439,12 @@ def _handle_grouping_op(self): elif next_token == Token.ExactMatch: interior = self._handle_and_op() expr = ExpressionExactMatch(next_token, right=interior) - next_token = self._next_token_is([Token.ExactMatchEnd]) - if next_token != Token.ExactMatchEnd: + next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional]) + if next_token == Token.ExactMatchOptional: + optional_portion = self._handle_and_op() + expr.left = optional_portion + next_token = self._next_token_is([Token.ExactMatchEnd]) + if next_token is None: raise ValueError("Parse error: Missing closing curly bracket") else: next_token = self._get_next_token() @@ -434,7 +466,7 @@ def _parse(self, expression_string): return expr def _tokenize(self, expression_string): - grouping_re = r"\[\[|\[|\]\]|\]|}|{" + grouping_re = r"\[\[|\[|\]\]|\]|}|{|:" paren_re = r"\)|\(|~" word_re = r"\?+|\band\b|\bor\b|,|[\"_\-a-zA-Z0-9/.^#\*@]+" re_string = fr"({grouping_re}|{paren_re}|{word_re})" diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py index 926338d8f..808114cf6 100644 --- a/tests/models/test_expression_parser.py +++ b/tests/models/test_expression_parser.py @@ -694,4 +694,39 @@ def test_not_in_line3(self): "(A, B, (C)), D": True, "(A, B, (C)), (D), E": True, } - self.base_test("@C or B", test_strings) \ No newline at end of file + self.base_test("@C or B", test_strings) + + def test_optional_exact_group(self): + test_strings = { + "A, C": True, + } + self.base_test("{a and (b or c)}", test_strings) + + test_strings = { + "A, B, C, D": True, + } + self.base_test("{a and b: c and d}", test_strings) + + test_strings = { + "A, B, C": True, + "A, B, C, D": False, + } + self.base_test("{a and b: c or d}", test_strings) + + test_strings = { + "A, C": True, + "A, D": True, + "A, B, C": False, + "A, B, C, D": False, + } + self.base_test("{a or b: c or d}", test_strings) + + test_strings = { + "(Onset, (Def-expand/taco))": True, + "(Onset, (Def-expand/taco, (Label/DefContents)))": True, + "(Onset, (Def-expand/taco), (Label/OnsetContents))": True, + "(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True, + "Onset, (Def-expand/taco), (Label/OnsetContents)": False, + "(Onset, (Def-expand/taco), Label/OnsetContents)": False, + } + self.base_test("[[{(Onset or Offset), (Def or [[Def-expand]]): ???}]]", test_strings) \ No newline at end of file