From 1372480a49caa07b6616d21d0f9105a31e45f120 Mon Sep 17 00:00:00 2001 From: amogh-jahagirdar Date: Fri, 15 Sep 2023 21:03:32 -0700 Subject: [PATCH] Python: Add list-refs CLI command --- python/pyiceberg/cli/console.py | 52 ++++++++++++++++++++++++++++++ python/pyiceberg/cli/output.py | 36 ++++++++++++++++++++- python/pyiceberg/table/__init__.py | 5 +++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/python/pyiceberg/cli/console.py b/python/pyiceberg/cli/console.py index 62f7a02fab1c..ae2e9124824a 100644 --- a/python/pyiceberg/cli/console.py +++ b/python/pyiceberg/cli/console.py @@ -19,6 +19,7 @@ from typing import ( Any, Callable, + Dict, Literal, Optional, Tuple, @@ -31,6 +32,10 @@ from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.cli.output import ConsoleOutput, JsonOutput, Output from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError +from pyiceberg.table.refs import SnapshotRef + +DEFAULT_MIN_SNAPSHOTS_TO_KEEP = 1 +DEFAULT_MAX_SNAPSHOT_AGE_MS = 432000000 def catch_exception() -> Callable: # type: ignore @@ -243,6 +248,53 @@ def rename(ctx: Context, from_identifier: str, to_identifier: str) -> None: output.text(f"Renamed table from {from_identifier} to {to_identifier}") +@run.command() +@click.argument("identifier") +@click.option("--type", required=False) +@click.option("--verbose", type=click.BOOL) +@click.pass_context +@catch_exception() +def list_refs(ctx: Context, identifier: str, type: str, verbose: bool) -> None: + """List all the refs in the provided table.""" + catalog, output = _catalog_and_output(ctx) + table = catalog.load_table(identifier) + refs = table.refs() + if type: + type = type.lower() + if type not in {"branch", "tag"}: + raise ValueError("Type must be either branch or tag") + + relevant_refs = [ + (ref_name, ref.snapshot_ref_type, _retention_properties(ref, table.properties)) + for (ref_name, ref) in refs.items() + if not type or ref.snapshot_ref_type == type + ] + + output.describe_refs(relevant_refs) + + +def _retention_properties(ref: SnapshotRef, table_properties: Dict[str, str]) -> Dict[str, str]: + retentition_properties = {} + if ref.snapshot_ref_type == "branch": + default_min_snapshots_to_keep = table_properties.get( + "history.expire.min-snapshots-to-keep", DEFAULT_MIN_SNAPSHOTS_TO_KEEP + ) + retentition_properties["min_snapshots_to_keep"] = ( + str(ref.min_snapshots_to_keep) if ref.min_snapshots_to_keep else str(default_min_snapshots_to_keep) + ) + default_max_snapshot_age_ms = table_properties.get("history.expire.max-snapshot-age-ms", DEFAULT_MAX_SNAPSHOT_AGE_MS) + retentition_properties["max_snapshot_age_ms"] = ( + str(ref.max_snapshot_age_ms) if ref.max_snapshot_age_ms else str(default_max_snapshot_age_ms) + ) + else: + retentition_properties["min_snapshots_to_keep"] = "N/A" + retentition_properties["max_snapshot_age_ms"] = "N/A" + + retentition_properties["max_ref_age_ms"] = str(ref.max_ref_age_ms) if ref.max_ref_age_ms else "forever" + + return retentition_properties + + @run.group() def properties() -> None: """Properties on tables/namespaces.""" diff --git a/python/pyiceberg/cli/output.py b/python/pyiceberg/cli/output.py index 299f84dafeb5..bfe9e44deff9 100644 --- a/python/pyiceberg/cli/output.py +++ b/python/pyiceberg/cli/output.py @@ -16,7 +16,13 @@ # under the License. import json from abc import ABC, abstractmethod -from typing import Any, List, Optional +from typing import ( + Any, + Dict, + List, + Optional, + Tuple, +) from uuid import UUID from rich.console import Console @@ -26,6 +32,7 @@ from pyiceberg.partitioning import PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table, TableMetadata +from pyiceberg.table.refs import SnapshotRefType from pyiceberg.typedef import IcebergBaseModel, Identifier, Properties @@ -72,6 +79,10 @@ def uuid(self, uuid: Optional[UUID]) -> None: def version(self, version: str) -> None: ... + @abstractmethod + def describe_refs(self, refs: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None: + ... + class ConsoleOutput(Output): """Writes to the console.""" @@ -127,6 +138,20 @@ def describe_table(self, table: Table) -> None: output_table.add_row("Properties", table_properties) Console().print(output_table) + def describe_refs(self, ref_details: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None: + refs_table = RichTable(title="Snapshot Refs") + refs_table.add_column("Ref") + refs_table.add_column("Type") + refs_table.add_column("Max ref age ms") + refs_table.add_column("Min snapshots to keep") + refs_table.add_column("Max snapshot age ms") + for name, type, ref_detail in ref_details: + refs_table.add_row( + name, type, ref_detail["max_ref_age_ms"], ref_detail["min_snapshots_to_keep"], ref_detail["max_snapshot_age_ms"] + ) + + Console().print(refs_table) + def files(self, table: Table, history: bool) -> None: if history: snapshots = table.metadata.snapshots @@ -226,3 +251,12 @@ def uuid(self, uuid: Optional[UUID]) -> None: def version(self, version: str) -> None: self._out({"version": version}) + + def describe_refs(self, refs: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None: + self._out( + [ + {"name": name, "type": type, detail_key: detail_val} + for name, type, detail in refs + for detail_key, detail_val in detail.items() + ] + ) diff --git a/python/pyiceberg/table/__init__.py b/python/pyiceberg/table/__init__.py index b905c955c848..945d24c30f51 100644 --- a/python/pyiceberg/table/__init__.py +++ b/python/pyiceberg/table/__init__.py @@ -69,6 +69,7 @@ visit, ) from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadata +from pyiceberg.table.refs import SnapshotRef from pyiceberg.table.snapshots import Snapshot, SnapshotLogEntry from pyiceberg.table.sorting import SortOrder from pyiceberg.typedef import ( @@ -517,6 +518,10 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None + def refs(self) -> Dict[str, SnapshotRef]: + """Return the snapshot references in the table.""" + return self.metadata.refs + def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" return self.metadata.snapshot_log