diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 8443315a64..6e71a40c2d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations import itertools +import uuid from abc import ABC, abstractmethod from copy import copy from dataclasses import dataclass @@ -498,6 +499,14 @@ def location(self) -> str: """Return the table's base location.""" return self.metadata.location + def new_snapshot_id(self) -> int: + """Generate a new snapshot-id that's not in use.""" + snapshot_id = _generate_snapshot_id() + while self.snapshot_by_id(snapshot_id) is not None: + snapshot_id = _generate_snapshot_id() + + return snapshot_id + def current_snapshot(self) -> Optional[Snapshot]: """Get the current snapshot for this table, or None if there is no current snapshot.""" if snapshot_id := self.metadata.current_snapshot_id: @@ -1566,3 +1575,17 @@ def _add_and_move_fields( elif len(moves) > 0: return _move_fields(fields, moves) return None if len(adds) == 0 else tuple(*fields, *adds) + + +def _generate_snapshot_id() -> int: + """Generate a new Snapshot ID from a UUID. + + Returns: An 64 bit long + """ + rnd_uuid = uuid.uuid4() + snapshot_id = int.from_bytes( + bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True + ) + snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 + + return snapshot_id diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 8fd5e2bcdb..369df4fa92 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -41,6 +41,7 @@ StaticTable, Table, UpdateSchema, + _generate_snapshot_id, _match_deletes_to_datafile, ) from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER @@ -506,3 +507,8 @@ def test_add_nested_list_type_column(table: Table) -> None: element_required=False, ) assert new_schema.highest_field_id == 7 + + +def test_generate_snapshot_id(table: Table) -> None: + assert isinstance(_generate_snapshot_id(), int) + assert isinstance(table.new_snapshot_id(), int)