From 0efe1353a69dd0b542ed944fbb7c4a0b70939b17 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 12 Dec 2023 16:08:22 -0500 Subject: [PATCH 1/5] first pass at `UnboundTransform` --- pyiceberg/expressions/parser.py | 14 +++++++++++++- pyiceberg/transforms.py | 27 ++++++++++++++++++++++++++- tests/expressions/test_parser.py | 4 ++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 8873907813..d06e05d05d 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -62,6 +62,7 @@ LongLiteral, StringLiteral, ) +from pyiceberg.transforms import UnboundTransform from pyiceberg.typedef import L ParserElement.enablePackrat() @@ -74,6 +75,7 @@ NULL = CaselessKeyword("null") NAN = CaselessKeyword("nan") LIKE = CaselessKeyword("like") +CAST = CaselessKeyword("cast") identifier = Word(alphas, alphanums + "_$").set_results_name("identifier") column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column") @@ -240,7 +242,17 @@ def _evaluate_like_statement(result: ParseResults) -> BooleanExpression: return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%'))) -predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate") +cast_expression = (CAST + "(" + column + "as" + identifier + Suppress(")")).set_results_name("cast") + + +@cast_expression.set_parse_action +def _(result: ParseResults) -> UnboundTransform[L]: + return UnboundTransform(Reference(result.column), result[-1]) + + +predicate = (cast_expression | comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name( + "predicate" +) def handle_not(result: ParseResults) -> Not: diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index b9afae9a7e..9cf43c050e 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -49,6 +49,7 @@ Reference, StartsWith, UnboundPredicate, + UnboundTerm, ) from pyiceberg.expressions.literals import ( DateLiteral, @@ -58,7 +59,8 @@ TimestampLiteral, literal, ) -from pyiceberg.typedef import IcebergRootModel, L +from pyiceberg.schema import Schema +from pyiceberg.typedef import IcebergRootModel, L, StructProtocol from pyiceberg.types import ( BinaryType, DateType, @@ -821,3 +823,26 @@ class BoundTransform(BoundTerm[L]): def __init__(self, term: BoundTerm[L], transform: Transform[L, Any]): self.term: BoundTerm[L] = term self.transform = transform + + def eval(self, struct: StructProtocol) -> L: + """Return the value at the referenced field's position in an object that abides by the StructProtocol.""" + return self.term.eval(struct) + + +class UnboundTransform(UnboundTerm[L]): + """An unbound transform expression.""" + + transform: Transform[L, Any] + + def __init__(self, term: UnboundTerm[L], transform: Transform[L, Any]): + self.term: UnboundTerm[L] = term + self.transform = transform + + def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundTransform[L]: + bound_term = self.term.bind(schema, case_sensitive) + + if not self.transform.can_transform(bound_term.ref().field.field_type): + raise ValueError("some better error message") + + else: + return BoundTransform(bound_term, self.transform) diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index 3ce2f2226c..c3d3151434 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -199,3 +199,7 @@ def test_with_function() -> None: parser.parse("foo = 1 and lower(bar) = '2'") assert "Expected end of text, found 'and'" in str(exc_info) + + +def test_cast() -> None: + parser.parse("CAST(created_at as date)") From 91170d49ba58242a368f519d9b06aaeac9d4078e Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 12 Dec 2023 16:14:27 -0500 Subject: [PATCH 2/5] add as_bound --- pyiceberg/transforms.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 9cf43c050e..2469e24fcc 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from enum import IntEnum from functools import singledispatch -from typing import Any, Callable, Generic, Optional, TypeVar +from typing import Any, Callable, Generic, Optional, Type, TypeVar from typing import Literal as LiteralType from uuid import UUID @@ -846,3 +846,7 @@ def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundTransform[L] else: return BoundTransform(bound_term, self.transform) + + @property + def as_bound(self) -> Type[BoundTerm[L]]: + return BoundTerm[L] From 74bd769fd8316b9f134f1532ffcb66b18de86c89 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 12 Dec 2023 16:21:46 -0500 Subject: [PATCH 3/5] closer --- pyiceberg/transforms.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 2469e24fcc..89df93e4a4 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -38,6 +38,7 @@ BoundNotIn, BoundNotStartsWith, BoundPredicate, + BoundReference, BoundSetPredicate, BoundStartsWith, BoundTerm, @@ -828,6 +829,10 @@ def eval(self, struct: StructProtocol) -> L: """Return the value at the referenced field's position in an object that abides by the StructProtocol.""" return self.term.eval(struct) + def ref(self) -> BoundReference[L]: + """Return the bound reference.""" + return self.term.ref() + class UnboundTransform(UnboundTerm[L]): """An unbound transform expression.""" From ccd50eb87ab5323a41ee9c8d1fe702d07292bd54 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 12 Dec 2023 16:30:53 -0500 Subject: [PATCH 4/5] add test --- pyiceberg/expressions/parser.py | 2 +- tests/expressions/test_parser.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index d06e05d05d..9116b3f835 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -247,7 +247,7 @@ def _evaluate_like_statement(result: ParseResults) -> BooleanExpression: @cast_expression.set_parse_action def _(result: ParseResults) -> UnboundTransform[L]: - return UnboundTransform(Reference(result.column), result[-1]) + return UnboundTransform(result.column, result[-1]) predicate = (cast_expression | comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name( diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index c3d3151434..d3120540c2 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -39,6 +39,7 @@ StartsWith, parser, ) +from pyiceberg.transforms import UnboundTransform, Reference def test_true() -> None: @@ -202,4 +203,5 @@ def test_with_function() -> None: def test_cast() -> None: - parser.parse("CAST(created_at as date)") + cast = parser.parse("CAST(created_at as date)") + assert cast.term == Reference("created_at") From f0bf24bc97e48036103160e2aa1227c895b87aa2 Mon Sep 17 00:00:00 2001 From: jayceslesar Date: Tue, 12 Dec 2023 16:31:16 -0500 Subject: [PATCH 5/5] lint --- tests/expressions/test_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index d3120540c2..fe958ad059 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -39,7 +39,7 @@ StartsWith, parser, ) -from pyiceberg.transforms import UnboundTransform, Reference +from pyiceberg.transforms import Reference def test_true() -> None: