From 9da77dede17619eeed294f030287d4ba324db747 Mon Sep 17 00:00:00 2001
From: Daniel Weeks <dweeks@apache.org>
Date: Sat, 21 Oct 2023 13:43:35 -0700
Subject: [PATCH 1/6] Update like statements to reflect sql behaciors

---
 pyiceberg/expressions/parser.py  | 13 ++++++++++++-
 tests/expressions/test_parser.py | 23 ++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 45805331be..f47f85e459 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -14,6 +14,7 @@
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.
+import re
 from decimal import Decimal
 
 from pyparsing import (
@@ -78,6 +79,7 @@
 identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
 column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column")
 
+like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'
 
 @column.set_parse_action
 def _(result: ParseResults) -> Reference:
@@ -217,7 +219,16 @@ def _(result: ParseResults) -> BooleanExpression:
 
 @starts_with.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
-    return StartsWith(result.column, result.raw_quoted_string)
+    literal_like: StringLiteral = result.raw_quoted_string
+
+    match = re.search(like_regex, literal_like.value)
+
+    if match and  match.groupdict()['invalid_wildcard']:
+        raise ValueError("LIKE expression only supports wildcard, '%', at the end of a string")
+    elif match and match.groupdict()['valid_wildcard']:
+        return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%')))
+    else:
+        return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%')))
 
 
 @not_starts_with.set_parse_action
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 65415f2e9a..439f7c27ba 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -168,8 +168,29 @@ def test_multiple_and_or() -> None:
     ) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 and bar is null)")
 
 
+def test_like_equality() -> None:
+    assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")
+
+
 def test_starts_with() -> None:
-    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
+    assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some \\% data%'")
+    assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some data\\%%'")
+
+
+def test_invalid_likes() -> None:
+    invalid_statements = [
+        "foo LIKE '%data%'",
+        "foo LIKE 'da%ta'"
+        "foo LIKE '%data'"
+    ]
+
+    for statement in invalid_statements:
+        with pytest.raises(ValueError) as exc_info:
+            parser.parse(statement)
+
+        assert "LIKE expression only supports wildcard, '%', at the end of a string" in str(exc_info)
 
 
 def test_not_starts_with() -> None:

From f07e21f8064f28ac50bb63789f18abddfb00721a Mon Sep 17 00:00:00 2001
From: Daniel Weeks <dweeks@apache.org>
Date: Sat, 21 Oct 2023 13:48:09 -0700
Subject: [PATCH 2/6] Codestyle

---
 pyiceberg/expressions/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index f47f85e459..ed7af3701d 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -223,7 +223,7 @@ def _(result: ParseResults) -> BooleanExpression:
 
     match = re.search(like_regex, literal_like.value)
 
-    if match and  match.groupdict()['invalid_wildcard']:
+    if match and match.groupdict()['invalid_wildcard']:
         raise ValueError("LIKE expression only supports wildcard, '%', at the end of a string")
     elif match and match.groupdict()['valid_wildcard']:
         return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%')))

From 94b735488734f9e7ac8d5a08162dada2d38cf608 Mon Sep 17 00:00:00 2001
From: Daniel Weeks <dweeks@apache.org>
Date: Sat, 21 Oct 2023 13:53:20 -0700
Subject: [PATCH 3/6] Codestyle

---
 pyiceberg/expressions/parser.py  | 1 +
 tests/expressions/test_parser.py | 6 +-----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index ed7af3701d..594d406cd1 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -81,6 +81,7 @@
 
 like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'
 
+
 @column.set_parse_action
 def _(result: ParseResults) -> Reference:
     return Reference(result.column[-1])
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 439f7c27ba..a41600f7f7 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -180,11 +180,7 @@ def test_starts_with() -> None:
 
 
 def test_invalid_likes() -> None:
-    invalid_statements = [
-        "foo LIKE '%data%'",
-        "foo LIKE 'da%ta'"
-        "foo LIKE '%data'"
-    ]
+    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'" "foo LIKE '%data'"]
 
     for statement in invalid_statements:
         with pytest.raises(ValueError) as exc_info:

From d36c82a2c87593399497874bdd999ca39d865985 Mon Sep 17 00:00:00 2001
From: Daniel Weeks <dweeks@apache.org>
Date: Sat, 21 Oct 2023 14:29:23 -0700
Subject: [PATCH 4/6] Handle NotStartsWith

---
 pyiceberg/expressions/parser.py  | 17 ++++++++++-------
 tests/expressions/test_parser.py |  5 +++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 594d406cd1..c2280f74f5 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -52,7 +52,6 @@
     NotIn,
     NotNaN,
     NotNull,
-    NotStartsWith,
     Or,
     Reference,
     StartsWith,
@@ -220,23 +219,27 @@ def _(result: ParseResults) -> BooleanExpression:
 
 @starts_with.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
+    return _evaluate_like_statement(result)
+
+
+@not_starts_with.set_parse_action
+def _(result: ParseResults) -> BooleanExpression:
+    return _evaluate_like_statement(result).__invert__()
+
+
+def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:
     literal_like: StringLiteral = result.raw_quoted_string
 
     match = re.search(like_regex, literal_like.value)
 
     if match and match.groupdict()['invalid_wildcard']:
-        raise ValueError("LIKE expression only supports wildcard, '%', at the end of a string")
+        raise ValueError("LIKE expressions only supports wildcard, '%', at the end of a string")
     elif match and match.groupdict()['valid_wildcard']:
         return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%')))
     else:
         return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%')))
 
 
-@not_starts_with.set_parse_action
-def _(result: ParseResults) -> BooleanExpression:
-    return NotStartsWith(result.column, result.raw_quoted_string)
-
-
 predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
 
 
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index a41600f7f7..71157e7cfa 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -186,11 +186,12 @@ def test_invalid_likes() -> None:
         with pytest.raises(ValueError) as exc_info:
             parser.parse(statement)
 
-        assert "LIKE expression only supports wildcard, '%', at the end of a string" in str(exc_info)
+        assert "LIKE expressions only supports wildcard, '%', at the end of a string" in str(exc_info)
 
 
 def test_not_starts_with() -> None:
-    assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data'")
+    assert NotEqualTo("foo", "data") == parser.parse("foo NOT LIKE 'data'")
+    assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data%'")
 
 
 def test_with_function() -> None:

From cf7087cf4fbeb731eb38254bb0dc255ffaf90b54 Mon Sep 17 00:00:00 2001
From: Daniel Weeks <daniel.c.weeks@gmail.com>
Date: Sat, 21 Oct 2023 15:30:23 -0700
Subject: [PATCH 5/6] Update pyiceberg/expressions/parser.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 pyiceberg/expressions/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index c2280f74f5..8873907813 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -224,7 +224,7 @@ def _(result: ParseResults) -> BooleanExpression:
 
 @not_starts_with.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
-    return _evaluate_like_statement(result).__invert__()
+    return ~_evaluate_like_statement(result)
 
 
 def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:

From 4794b606ccc1f26871c34acb7d2841e57f4a83a6 Mon Sep 17 00:00:00 2001
From: Daniel Weeks <daniel.c.weeks@gmail.com>
Date: Sat, 21 Oct 2023 15:30:53 -0700
Subject: [PATCH 6/6] Update tests/expressions/test_parser.py

Co-authored-by: Fokko Driesprong <fokko@apache.org>
---
 tests/expressions/test_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 71157e7cfa..8257710f66 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -180,7 +180,7 @@ def test_starts_with() -> None:
 
 
 def test_invalid_likes() -> None:
-    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'" "foo LIKE '%data'"]
+    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'", "foo LIKE '%data'"]
 
     for statement in invalid_statements:
         with pytest.raises(ValueError) as exc_info: