From 1338bbc45d8c95b33fc5b6a2129ebf6be92ba806 Mon Sep 17 00:00:00 2001 From: "R. Conner Howell" Date: Tue, 21 Apr 2026 12:41:05 -0700 Subject: [PATCH 1/2] fix: add iceberg_type column for SqlCatalog --- pyiceberg/catalog/sql.py | 13 +++++++ tests/catalog/test_sql.py | 82 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index e18a0598b9..4bcbf91ab4 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -27,6 +27,7 @@ delete, insert, select, + text, union, update, ) @@ -92,6 +93,7 @@ class IcebergTables(SqlCatalogBaseTable): table_name: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) metadata_location: Mapped[str | None] = mapped_column(String(1000), nullable=True) previous_metadata_location: Mapped[str | None] = mapped_column(String(1000), nullable=True) + iceberg_type: Mapped[str | None] = mapped_column(String(5), nullable=True, default="TABLE") class IcebergNamespaceProperties(SqlCatalogBaseTable): @@ -133,6 +135,8 @@ def __init__(self, name: str, **properties: str): if init_catalog_tables: self._ensure_tables_exist() + self._update_tables_if_required() + def _ensure_tables_exist(self) -> None: with Session(self.engine) as session: for table in [IcebergTables, IcebergNamespaceProperties]: @@ -146,6 +150,15 @@ def _ensure_tables_exist(self) -> None: self.create_tables() return + def _update_tables_if_required(self) -> None: + with Session(self.engine) as session: + stmt = f"ALTER TABLE {IcebergTables.__tablename__} ADD COLUMN iceberg_type VARCHAR(5)" + try: + session.execute(text(stmt)) + session.commit() + except (OperationalError, ProgrammingError): + session.rollback() + def create_tables(self) -> None: SqlCatalogBaseTable.metadata.create_all(self.engine) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index f6846195fe..59ae00f3b1 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -20,7 +20,7 @@ from typing import cast import pytest -from sqlalchemy import Engine, create_engine, inspect +from sqlalchemy import Engine, create_engine, inspect, text from sqlalchemy.exc import ArgumentError from pyiceberg.catalog import load_catalog @@ -261,3 +261,83 @@ def test_sql_catalog_multiple_close_calls(self, catalog_sqlite: SqlCatalog) -> N # Second close should not raise an exception catalog_sqlite.close() + + +def _create_pre_migration_schema_tables(engine: Engine) -> None: + with engine.connect() as conn: + conn.execute( + text( + "CREATE TABLE iceberg_tables (" + " catalog_name VARCHAR(255) NOT NULL," + " table_namespace VARCHAR(255) NOT NULL," + " table_name VARCHAR(255) NOT NULL," + " metadata_location VARCHAR(1000)," + " previous_metadata_location VARCHAR(1000)," + " PRIMARY KEY (catalog_name, table_namespace, table_name)" + ")" + ) + ) + conn.execute( + text( + "CREATE TABLE iceberg_namespace_properties (" + " catalog_name VARCHAR(255) NOT NULL," + " namespace VARCHAR(255) NOT NULL," + " property_key VARCHAR(255) NOT NULL," + " property_value VARCHAR(1000) NOT NULL," + " PRIMARY KEY (catalog_name, namespace, property_key)" + ")" + ) + ) + conn.commit() + + +def get_columns(engine: Engine) -> set[str]: + return {c["name"] for c in inspect(engine).get_columns("iceberg_tables")} + + +def test_adds_iceberg_type_column_to_old_schema(warehouse: Path) -> None: + # Create the old schema tables + uri = f"sqlite:////{warehouse}/test-migration-add-col" + engine = create_engine(uri) + with engine.connect() as conn: + conn.execute( + text( + "CREATE TABLE iceberg_tables (" + " catalog_name VARCHAR(255) NOT NULL," + " table_namespace VARCHAR(255) NOT NULL," + " table_name VARCHAR(255) NOT NULL," + " metadata_location VARCHAR(1000)," + " previous_metadata_location VARCHAR(1000)," + " PRIMARY KEY (catalog_name, table_namespace, table_name)" + ")" + ) + ) + conn.execute( + text( + "CREATE TABLE iceberg_namespace_properties (" + " catalog_name VARCHAR(255) NOT NULL," + " namespace VARCHAR(255) NOT NULL," + " property_key VARCHAR(255) NOT NULL," + " property_value VARCHAR(1000) NOT NULL," + " PRIMARY KEY (catalog_name, namespace, property_key)" + ")" + ) + ) + conn.commit() + + # Verify the column does not exist in the old schema + assert "iceberg_type" not in get_columns(engine) + + # Load the catalog and verify the column exists + catalog = SqlCatalog("test", uri=uri, warehouse=f"file://{warehouse}", init_catalog_tables="false") + assert "iceberg_type" in get_columns(catalog.engine) + + +def test_idempotent_when_column_already_exists(warehouse: Path) -> None: + # Verify the column was created by the init_tables call + catalog = SqlCatalog("test", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}") + assert "iceberg_type" in get_columns(catalog.engine) + + # Verify the method is idempotent by calling it again + catalog._update_tables_if_required() + assert "iceberg_type" in get_columns(catalog.engine) From d048c5a5efc17ec1d9f314e8e58edd4aa6dd055d Mon Sep 17 00:00:00 2001 From: "R. Conner Howell" Date: Wed, 22 Apr 2026 16:43:46 -0700 Subject: [PATCH 2/2] filter on iceberg_type column --- pyiceberg/catalog/sql.py | 6 +++- tests/catalog/test_sql.py | 59 ++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 4bcbf91ab4..e401b4716e 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -613,7 +613,11 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") namespace = Catalog.namespace_to_string(namespace) - stmt = select(IcebergTables).where(IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace) + stmt = select(IcebergTables).where( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == namespace, + (IcebergTables.iceberg_type == "TABLE") | (IcebergTables.iceberg_type.is_(None)), + ) with Session(self.engine) as session: result = session.scalars(stmt) return [(Catalog.identifier_to_tuple(table.table_namespace) + (table.table_name,)) for table in result] diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 59ae00f3b1..b722be723b 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -263,34 +263,6 @@ def test_sql_catalog_multiple_close_calls(self, catalog_sqlite: SqlCatalog) -> N catalog_sqlite.close() -def _create_pre_migration_schema_tables(engine: Engine) -> None: - with engine.connect() as conn: - conn.execute( - text( - "CREATE TABLE iceberg_tables (" - " catalog_name VARCHAR(255) NOT NULL," - " table_namespace VARCHAR(255) NOT NULL," - " table_name VARCHAR(255) NOT NULL," - " metadata_location VARCHAR(1000)," - " previous_metadata_location VARCHAR(1000)," - " PRIMARY KEY (catalog_name, table_namespace, table_name)" - ")" - ) - ) - conn.execute( - text( - "CREATE TABLE iceberg_namespace_properties (" - " catalog_name VARCHAR(255) NOT NULL," - " namespace VARCHAR(255) NOT NULL," - " property_key VARCHAR(255) NOT NULL," - " property_value VARCHAR(1000) NOT NULL," - " PRIMARY KEY (catalog_name, namespace, property_key)" - ")" - ) - ) - conn.commit() - - def get_columns(engine: Engine) -> set[str]: return {c["name"] for c in inspect(engine).get_columns("iceberg_tables")} @@ -341,3 +313,34 @@ def test_idempotent_when_column_already_exists(warehouse: Path) -> None: # Verify the method is idempotent by calling it again catalog._update_tables_if_required() assert "iceberg_type" in get_columns(catalog.engine) + + +def test_list_tables_filters_by_iceberg_type(warehouse: Path) -> None: + catalog = SqlCatalog("test", uri="sqlite:///:memory:", warehouse=f"file://{warehouse}") + catalog.create_namespace("ns") + schema = Schema(NestedField(1, "id", StringType(), required=True)) + catalog.create_table(("ns", "table_V1"), schema) + + # Insert a legac-schema row (iceberg_type IS NULL), so itshould appear in list_tables + with catalog.engine.connect() as conn: + conn.execute( + text( + "INSERT INTO iceberg_tables " + "(catalog_name, table_namespace, table_name, metadata_location, previous_metadata_location, iceberg_type) " + "VALUES ('test', 'ns', 'table_V0', NULL, NULL, NULL)" + ) + ) + # Insert a non-TABLE row — should NOT appear in list_tables + conn.execute( + text( + "INSERT INTO iceberg_tables " + "(catalog_name, table_namespace, table_name, metadata_location, previous_metadata_location, iceberg_type) " + "VALUES ('test', 'ns', 'some_view', NULL, NULL, 'VIEW')" + ) + ) + conn.commit() + + tables = [t[-1] for t in catalog.list_tables("ns")] + assert "table_V1" in tables + assert "table_V0" in tables + assert "some_view" not in tables