-
Notifications
You must be signed in to change notification settings - Fork 3k
Closed
Labels
Description
Apache Iceberg version
None
Query engine
None
Please describe the bug 🐞
The followig scan on the taxi dataset:
df = tbl.scan(row_filter=And(
GreaterThanOrEqual("tpep_pickup_datetime", "2021-04-01T00:00:00.000000+00:00"),
LessThan("tpep_pickup_datetime", "2021-05-01T00:00:00.000000+00:00")
)).to_arrow().to_pandas()Results in the following exception:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[38], line 3
1 from pyiceberg.expressions import And, GreaterThanOrEqual, LessThan
----> 3 df = tbl.scan(row_filter=And(
4 GreaterThanOrEqual("tpep_pickup_datetime", "2021-04-01T00:00:00.000000+00:00"),
5 LessThan("tpep_pickup_datetime", "2021-05-01T00:00:00.000000+00:00")
6 )).to_arrow().to_pandas()
8 df
File /usr/local/lib/python3.9/site-packages/pyiceberg/table/__init__.py:350, in DataScan.to_arrow(self)
347 fs = self.table.io.get_fs(scheme)
349 locations = []
--> 350 for task in self.plan_files():
351 if isinstance(task, FileScanTask):
352 _, path = PyArrowFileIO.parse_location(task.file.file_path)
File /usr/local/lib/python3.9/site-packages/pyiceberg/table/__init__.py:335, in DataScan.plan_files(self)
332 all_files = files(io.new_input(manifest.manifest_path))
333 matching_partition_files = filter(partition_filter, all_files)
--> 335 yield from (FileScanTask(file) for file in matching_partition_files)
File /usr/local/lib/python3.9/site-packages/pyiceberg/table/__init__.py:335, in <genexpr>(.0)
332 all_files = files(io.new_input(manifest.manifest_path))
333 matching_partition_files = filter(partition_filter, all_files)
--> 335 yield from (FileScanTask(file) for file in matching_partition_files)
File /usr/local/lib/python3.9/site-packages/pyiceberg/table/__init__.py:305, in DataScan._build_partition_evaluator.<locals>.<lambda>(data_file)
302 wrapper = _DictAsStruct(partition_type)
303 evaluator = visitors.expression_evaluator(partition_schema, partition_expr, self.case_sensitive)
--> 305 return lambda data_file: evaluator(wrapper.wrap(data_file.partition))
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:437, in _ExpressionEvaluator.eval(self, struct)
435 def eval(self, struct: StructProtocol) -> bool:
436 self.struct = struct
--> 437 return visit(self.bound, self)
File /usr/local/lib/python3.9/functools.py:888, in singledispatch.<locals>.wrapper(*args, **kw)
884 if not args:
885 raise TypeError(f'{funcname} requires at least '
886 '1 positional argument')
--> 888 return dispatch(args[0].__class__)(*args, **kw)
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:164, in _(obj, visitor)
161 @visit.register(And)
162 def _(obj: And, visitor: BooleanExpressionVisitor[T]) -> T:
163 """Visit an And boolean expression with a concrete BooleanExpressionVisitor"""
--> 164 left_result: T = visit(obj.left, visitor=visitor)
165 right_result: T = visit(obj.right, visitor=visitor)
166 return visitor.visit_and(left_result=left_result, right_result=right_result)
File /usr/local/lib/python3.9/functools.py:888, in singledispatch.<locals>.wrapper(*args, **kw)
884 if not args:
885 raise TypeError(f'{funcname} requires at least '
886 '1 positional argument')
--> 888 return dispatch(args[0].__class__)(*args, **kw)
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:178, in _(obj, visitor)
175 @visit.register(BoundPredicate)
176 def _(obj: BoundPredicate[L], visitor: BooleanExpressionVisitor[T]) -> T:
177 """Visit a bound boolean expression with a concrete BooleanExpressionVisitor"""
--> 178 return visitor.visit_bound_predicate(predicate=obj)
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:326, in BoundBooleanExpressionVisitor.visit_bound_predicate(self, predicate)
321 def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> T:
322 """Visit a bound predicate
323 Args:
324 predicate (BoundPredicate[L]): A bound predicate
325 """
--> 326 return visit_bound_predicate(predicate, self)
File /usr/local/lib/python3.9/functools.py:888, in singledispatch.<locals>.wrapper(*args, **kw)
884 if not args:
885 raise TypeError(f'{funcname} requires at least '
886 '1 positional argument')
--> 888 return dispatch(args[0].__class__)(*args, **kw)
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:377, in _(expr, visitor)
374 @visit_bound_predicate.register(BoundGreaterThanOrEqual)
375 def _(expr: BoundGreaterThanOrEqual[L], visitor: BoundBooleanExpressionVisitor[T]) -> T:
376 """Visit a bound GreaterThanOrEqual predicate"""
--> 377 return visitor.visit_greater_than_or_equal(term=expr.term, literal=expr.literal)
File /usr/local/lib/python3.9/site-packages/pyiceberg/expressions/visitors.py:466, in _ExpressionEvaluator.visit_greater_than_or_equal(self, term, literal)
465 def visit_greater_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> bool:
--> 466 return term.eval(self.struct) >= literal.value
TypeError: '>=' not supported between instances of 'datetime.date' and 'int'
It looks like some conversion is missed