Apache Iceberg version
0.11.0.dev20260416005052
Please describe the bug 🐞
Hello,
I run one of the CI build of pyiceberg (0.11.0.dev20260416005052).
I run pyiceberg on a big dataset, I needed the recent optimisation on overwrite: #3011
Without that, pyiceberg is scanning all the manifests.
Sadly, it introduced a regression, now my batch fails due to a too deep recursion in pyiceberg.
Here is a minimal repro:
from pathlib import Path
from tempfile import TemporaryDirectory
from pyiceberg.catalog import load_catalog
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
from pyiceberg.transforms import IdentityTransform
from pyiceberg.types import NestedField
def make_row(partition_value: str, value: int) -> pa.Table:
return pa.table(
{
"date": pa.array([partition_value], type=pa.large_string()),
"value": pa.array([value], type=pa.int64()),
}
)
with TemporaryDirectory() as tmpdir:
warehouse = Path(tmpdir, "warehouse")
warehouse.mkdir(parents=True, exist_ok=True)
warehouse_uri = f"file://{warehouse.resolve().as_posix()}"
catalog = load_catalog(
"local",
type="in-memory",
warehouse=warehouse_uri,
)
catalog.create_namespace("default")
schema = Schema(
NestedField(field_id=1, name="date", field_type="string", required=False),
NestedField(field_id=2, name="value", field_type="long", required=False),
)
partition_spec = PartitionSpec(
PartitionField(
source_id=1,
field_id=1000,
transform=IdentityTransform(),
name="date",
)
)
table = catalog.create_table(
"default.repro",
schema=schema,
partition_spec=partition_spec,
)
# Seed 512 distinct partition files.
for i in range(512):
table.append(make_row(f"2026-02-{i:04d}T00", 1))
files_to_delete = [task.file for task in table.scan().plan_files()]
print(f"files_to_delete={len(files_to_delete)}")
# Repro: open a transaction and do an overwrite, like our code path.
with table.transaction() as tx:
with tx.update_snapshot().overwrite() as overwrite_snapshot:
for data_file in files_to_delete:
overwrite_snapshot.delete_data_file(data_file)
This fails with the following error:
[... truncated ...]
File "C:\Dev\python\.venv\Lib\site-packages\pyiceberg\expressions\visitors.py", line 193, in _
left_result: T = visit(obj.left, visitor=visitor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Dev\python\.venv\Lib\site-packages\pyiceberg\expressions\visitors.py", line 193, in _
left_result: T = visit(obj.left, visitor=visitor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Dev\python\.venv\Lib\site-packages\pyiceberg\expressions\visitors.py", line 193, in _
left_result: T = visit(obj.left, visitor=visitor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Dev\python\.venv\Lib\site-packages\pyiceberg\expressions\visitors.py", line 193, in _
left_result: T = visit(obj.left, visitor=visitor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Dev\python\.venv\Lib\site-packages\pyiceberg\expressions\visitors.py", line 193, in _
left_result: T = visit(obj.left, visitor=visitor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\functools.py", line 832, in dispatch
impl = dispatch_cache[cls]
~~~~~~~~~~~~~~^^^^^
File "C:\Program Files\Python311\Lib\weakref.py", line 415, in __getitem__
return self.data[ref(key)]
~~~~~~~~~^^^^^^^^^^
RecursionError: maximum recursion depth exceeded in comparison
Willingness to contribute
Apache Iceberg version
0.11.0.dev20260416005052
Please describe the bug 🐞
Hello,
I run one of the CI build of pyiceberg (0.11.0.dev20260416005052).
I run pyiceberg on a big dataset, I needed the recent optimisation on overwrite: #3011
Without that, pyiceberg is scanning all the manifests.
Sadly, it introduced a regression, now my batch fails due to a too deep recursion in pyiceberg.
Here is a minimal repro:
This fails with the following error:
Willingness to contribute