From efde265e0539595c364700c534d0ad00f405f815 Mon Sep 17 00:00:00 2001 From: Mehul Batra Date: Wed, 8 May 2024 00:54:43 +0530 Subject: [PATCH 1/4] register table using iceberg metadata file --- pyiceberg/catalog/glue.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 275cda7ed0..838d7b4115 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -417,7 +417,12 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: Raises: TableAlreadyExistsError: If the table already exists """ - raise NotImplementedError + database_name, table_name = self.identifier_to_database_and_table(identifier) + io = self._load_file_io(location=metadata_location) + table_input = FromInputFile.table_metadata(io.new_input(metadata_location)) + self._create_glue_table(database_name=database_name, table_name=table_name, table_input=table_input) + + return self.load_table(identifier=identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. From 46fa787df3b689b1899d0e3bd9ea573b2c4a25e1 Mon Sep 17 00:00:00 2001 From: Mehul Batra Date: Tue, 14 May 2024 00:18:23 +0530 Subject: [PATCH 2/4] register table procedure for glue with unit test --- pyiceberg/catalog/glue.py | 6 ++++-- tests/catalog/test_glue.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 838d7b4115..8819c2e266 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -418,10 +418,12 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: TableAlreadyExistsError: If the table already exists """ database_name, table_name = self.identifier_to_database_and_table(identifier) + properties = EMPTY_DICT io = self._load_file_io(location=metadata_location) - table_input = FromInputFile.table_metadata(io.new_input(metadata_location)) + file = io.new_input(metadata_location) + metadata = FromInputFile.table_metadata(file) + table_input = _construct_table_input(table_name, metadata_location, properties, metadata) self._create_glue_table(database_name=database_name, table_name=table_name, table_input=table_input) - return self.load_table(identifier=identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 5b67b92c68..1aea46d6ef 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -848,3 +848,17 @@ def test_table_exists( assert test_catalog.table_exists(identifier) is True # Act and Assert for a non-existing table assert test_catalog.table_exists(('non', 'exist')) is False + + +@mock_aws +def test_register_table_with_given_location( + _bucket_initialize: None, moto_endpoint_url: str, metadata_location: str, database_name: str, table_name: str +) -> None: + catalog_name = "glue" + identifier = (database_name, table_name) + location = metadata_location + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) + test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"}) + table = test_catalog.register_table(identifier, location) + assert table.identifier == (catalog_name,) + identifier + assert test_catalog.table_exists(identifier) is True From 05e27ca894190c8ea0fed48f9908cdc7b124e7e3 Mon Sep 17 00:00:00 2001 From: Mehul Batra Date: Thu, 16 May 2024 17:25:04 +0530 Subject: [PATCH 3/4] integration test for register_table --- tests/catalog/integration_test_glue.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index a2c430de5f..99fa998b08 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -564,3 +564,13 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_ test_catalog.create_namespace(database_name) test_catalog.create_table((database_name, table_name), table_schema_nested) assert test_catalog.table_exists((database_name, table_name)) is True + + +def test_register_table(test_catalog: Catalog, metadata_location: str, table_name: str, database_name: str) -> None: + identifier = (database_name, table_name) + test_catalog.create_namespace(database_name) + location = metadata_location + table = test_catalog.register_table(identifier, location) + assert table.identifier == (CATALOG_NAME,) + identifier + assert table.metadata_location == metadata_location + assert test_catalog.table_exists((database_name, table_name)) is True From 99fdc89ae68ca1911cf555e025a81af2d5e6300a Mon Sep 17 00:00:00 2001 From: Mehul Batra Date: Wed, 22 May 2024 16:08:36 +0530 Subject: [PATCH 4/4] changes in integration test --- tests/catalog/integration_test_glue.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index 99fa998b08..f6d9c9cc4b 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -566,11 +566,17 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_ assert test_catalog.table_exists((database_name, table_name)) is True -def test_register_table(test_catalog: Catalog, metadata_location: str, table_name: str, database_name: str) -> None: +def test_register_table_with_given_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: identifier = (database_name, table_name) + new_identifier = (database_name, f"new_{table_name}") test_catalog.create_namespace(database_name) - location = metadata_location - table = test_catalog.register_table(identifier, location) - assert table.identifier == (CATALOG_NAME,) + identifier - assert table.metadata_location == metadata_location - assert test_catalog.table_exists((database_name, table_name)) is True + tbl = test_catalog.create_table(identifier, table_schema_nested) + location = tbl.metadata_location + test_catalog.drop_table(identifier) # drops the table but keeps the metadata file + assert not test_catalog.table_exists(identifier) + table = test_catalog.register_table(new_identifier, location) + assert table.identifier == (CATALOG_NAME,) + new_identifier + assert table.metadata_location == location + assert test_catalog.table_exists(new_identifier)