From aa5a1366ec5ba4ef27cf2547cb90b6cc1dddf4df Mon Sep 17 00:00:00 2001 From: Mehul Batra <66407733+MehulBatra@users.noreply.github.com> Date: Thu, 23 May 2024 02:54:13 +0530 Subject: [PATCH 01/68] [FEAT]register table using iceberg metadata file via pyiceberg (#711) --- pyiceberg/catalog/glue.py | 9 ++++++++- tests/catalog/integration_test_glue.py | 16 ++++++++++++++++ tests/catalog/test_glue.py | 14 ++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 275cda7ed0..8819c2e266 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -417,7 +417,14 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: Raises: TableAlreadyExistsError: If the table already exists """ - raise NotImplementedError + database_name, table_name = self.identifier_to_database_and_table(identifier) + properties = EMPTY_DICT + io = self._load_file_io(location=metadata_location) + file = io.new_input(metadata_location) + metadata = FromInputFile.table_metadata(file) + table_input = _construct_table_input(table_name, metadata_location, properties, metadata) + self._create_glue_table(database_name=database_name, table_name=table_name, table_input=table_input) + return self.load_table(identifier=identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index 5b4aa58787..ee43779073 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -570,3 +570,19 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_ test_catalog.create_namespace(database_name) test_catalog.create_table((database_name, table_name), table_schema_nested) assert test_catalog.table_exists((database_name, table_name)) is True + + +def test_register_table_with_given_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + identifier = (database_name, table_name) + new_identifier = (database_name, f"new_{table_name}") + test_catalog.create_namespace(database_name) + tbl = test_catalog.create_table(identifier, table_schema_nested) + location = tbl.metadata_location + test_catalog.drop_table(identifier) # drops the table but keeps the metadata file + assert not test_catalog.table_exists(identifier) + table = test_catalog.register_table(new_identifier, location) + assert table.identifier == (CATALOG_NAME,) + new_identifier + assert table.metadata_location == location + assert test_catalog.table_exists(new_identifier) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 5b67b92c68..1aea46d6ef 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -848,3 +848,17 @@ def test_table_exists( assert test_catalog.table_exists(identifier) is True # Act and Assert for a non-existing table assert test_catalog.table_exists(('non', 'exist')) is False + + +@mock_aws +def test_register_table_with_given_location( + _bucket_initialize: None, moto_endpoint_url: str, metadata_location: str, database_name: str, table_name: str +) -> None: + catalog_name = "glue" + identifier = (database_name, table_name) + location = metadata_location + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) + test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"}) + table = test_catalog.register_table(identifier, location) + assert table.identifier == (catalog_name,) + identifier + assert test_catalog.table_exists(identifier) is True From 5537cb4394b580b0f8eb78c3c7c549fb863b0e99 Mon Sep 17 00:00:00 2001 From: SeungyeopShin <109323024+SeungyeopShin@users.noreply.github.com> Date: Thu, 23 May 2024 14:20:42 +0900 Subject: [PATCH 02/68] modify doc(backward compatibility) typo (#757) --- mkdocs/docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 1ca071f009..c0879b1d28 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -298,4 +298,4 @@ PyIceberg uses multiple threads to parallelize operations. The number of workers # Backward Compatibility -Previous versions of Java (`<1.4.0`) implementations incorrectly assume the optional attribute `current-snapshot-id` to be a required attribute in TableMetadata. This means that if `current-snapshot-id` is missing in the metadata file (e.g. on table creation), the application will throw an exception without being able to load the table. This assumption has been corrected in more recent Iceberg versions. However, it is possible to force PyIceberg to create a table with a metadata file that will be compatible with previous versions. This can be configured by setting the `legacy-current-snapshot-id` entry as "True" in the configuration file, or by setting the `LEGACY_CURRENT_SNAPSHOT_ID` environment variable. Refer to the [PR discussion](https://github.com/apache/iceberg-python/pull/473) for more details on the issue +Previous versions of Java (`<1.4.0`) implementations incorrectly assume the optional attribute `current-snapshot-id` to be a required attribute in TableMetadata. This means that if `current-snapshot-id` is missing in the metadata file (e.g. on table creation), the application will throw an exception without being able to load the table. This assumption has been corrected in more recent Iceberg versions. However, it is possible to force PyIceberg to create a table with a metadata file that will be compatible with previous versions. This can be configured by setting the `legacy-current-snapshot-id` entry as "True" in the configuration file, or by setting the `PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID` environment variable. Refer to the [PR discussion](https://github.com/apache/iceberg-python/pull/473) for more details on the issue From e91766062cdff3ccbce068de115e6a301bae6730 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 09:19:39 +0200 Subject: [PATCH 03/68] Bump requests from 2.32.1 to 2.32.2 (#759) updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3c075152af..c480c4a0d8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3584,13 +3584,13 @@ files = [ [[package]] name = "requests" -version = "2.32.1" +version = "2.32.2" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" files = [ - {file = "requests-2.32.1-py3-none-any.whl", hash = "sha256:21ac9465cdf8c1650fe1ecde8a71669a93d4e6f147550483a2967d08396a56a5"}, - {file = "requests-2.32.1.tar.gz", hash = "sha256:eb97e87e64c79e64e5b8ac75cee9dd1f97f49e289b083ee6be96268930725685"}, + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, ] [package.dependencies] From 7083b2e01d1259e75f77a8efd466d1291f5a352f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 09:19:57 +0200 Subject: [PATCH 04/68] Bump griffe from 0.45.0 to 0.45.1 (#760) updated-dependencies: - dependency-name: griffe dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index ccf518ba9b..386f5b2301 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,7 +16,7 @@ # under the License. mkdocs==1.6.0 -griffe==0.45.0 +griffe==0.45.1 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.2 From 03a0d65ac05d556d0815e61a016effc2b8993702 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 09:20:14 +0200 Subject: [PATCH 05/68] Bump mypy-boto3-glue from 1.34.88 to 1.34.110 (#761) updated-dependencies: - dependency-name: mypy-boto3-glue dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index c480c4a0d8..d19640868c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2490,13 +2490,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.88" -description = "Type annotations for boto3.Glue 1.34.88 service generated with mypy-boto3-builder 7.23.2" +version = "1.34.110" +description = "Type annotations for boto3.Glue 1.34.110 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.88-py3-none-any.whl", hash = "sha256:bb5c4ac3ac4806fb19ff3bebe2400635cf0d959e4a086a3de36b0eccbf04febc"}, - {file = "mypy_boto3_glue-1.34.88.tar.gz", hash = "sha256:7626368b66c92236f57008bf56303f3eda1ef2705ffe0d2cd845b1b877eb0596"}, + {file = "mypy_boto3_glue-1.34.110-py3-none-any.whl", hash = "sha256:795eca329426bf1ae3dc95090cccafcd7b3d91c4c594dac4db1fd9d6c72390c9"}, + {file = "mypy_boto3_glue-1.34.110.tar.gz", hash = "sha256:80d39849ac10ad9d57d85b94016fce8caba2cb70a3544b5b8b9bf0713ab3a041"}, ] [package.dependencies] From 996afd0c44717d6ac345b8419bf01b25be2d6051 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 10:22:54 +0200 Subject: [PATCH 06/68] Bump mkdocstrings-python from 1.10.2 to 1.10.3 (#762) Bumps [mkdocstrings-python](https://github.com/mkdocstrings/python) from 1.10.2 to 1.10.3. - [Release notes](https://github.com/mkdocstrings/python/releases) - [Changelog](https://github.com/mkdocstrings/python/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/python/compare/1.10.2...1.10.3) --- updated-dependencies: - dependency-name: mkdocstrings-python dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 386f5b2301..5d375f50f3 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -19,7 +19,7 @@ mkdocs==1.6.0 griffe==0.45.1 jinja2==3.1.4 mkdocstrings==0.25.1 -mkdocstrings-python==1.10.2 +mkdocstrings-python==1.10.3 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 From eba4beeff046dd92d234fe7779fdbe76d61bd1bf Mon Sep 17 00:00:00 2001 From: Drew Gallardo Date: Thu, 23 May 2024 02:39:26 -0700 Subject: [PATCH 07/68] Initial implementation of the manifest table (#717) --- mkdocs/docs/api.md | 50 ++++++++++++++ pyiceberg/table/__init__.py | 89 +++++++++++++++++++++++++ tests/integration/test_inspect_table.py | 83 +++++++++++++++++++++++ 3 files changed, 222 insertions(+) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 0bc23fb0dc..70b5fd62eb 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -606,6 +606,56 @@ min_snapshots_to_keep: [[null,10]] max_snapshot_age_in_ms: [[null,604800000]] ``` +### Manifests + +To show a table's current file manifests: + +```python +table.inspect.manifests() +``` + +``` +pyarrow.Table +content: int8 not null +path: string not null +length: int64 not null +partition_spec_id: int32 not null +added_snapshot_id: int64 not null +added_data_files_count: int32 not null +existing_data_files_count: int32 not null +deleted_data_files_count: int32 not null +added_delete_files_count: int32 not null +existing_delete_files_count: int32 not null +deleted_delete_files_count: int32 not null +partition_summaries: list> not null + child 0, item: struct + child 0, contains_null: bool not null + child 1, contains_nan: bool + child 2, lower_bound: string + child 3, upper_bound: string +---- +content: [[0]] +path: [["s3://warehouse/default/table_metadata_manifests/metadata/3bf5b4c6-a7a4-4b43-a6ce-ca2b4887945a-m0.avro"]] +length: [[6886]] +partition_spec_id: [[0]] +added_snapshot_id: [[3815834705531553721]] +added_data_files_count: [[1]] +existing_data_files_count: [[0]] +deleted_data_files_count: [[0]] +added_delete_files_count: [[0]] +existing_delete_files_count: [[0]] +deleted_delete_files_count: [[0]] +partition_summaries: [[ -- is_valid: all not null + -- child 0 type: bool +[false] + -- child 1 type: bool +[false] + -- child 2 type: string +["test"] + -- child 3 type: string +["test"]]] +``` + ## Add Files Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them. diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index c57f0d1297..74b0225dbe 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -71,6 +71,7 @@ ManifestEntry, ManifestEntryStatus, ManifestFile, + PartitionFieldSummary, write_manifest, write_manifest_list, ) @@ -3547,6 +3548,94 @@ def update_partitions_map( schema=table_schema, ) + def manifests(self) -> "pa.Table": + import pyarrow as pa + + from pyiceberg.conversions import from_bytes + + partition_summary_schema = pa.struct([ + pa.field("contains_null", pa.bool_(), nullable=False), + pa.field("contains_nan", pa.bool_(), nullable=True), + pa.field("lower_bound", pa.string(), nullable=True), + pa.field("upper_bound", pa.string(), nullable=True), + ]) + + manifest_schema = pa.schema([ + pa.field('content', pa.int8(), nullable=False), + pa.field('path', pa.string(), nullable=False), + pa.field('length', pa.int64(), nullable=False), + pa.field('partition_spec_id', pa.int32(), nullable=False), + pa.field('added_snapshot_id', pa.int64(), nullable=False), + pa.field('added_data_files_count', pa.int32(), nullable=False), + pa.field('existing_data_files_count', pa.int32(), nullable=False), + pa.field('deleted_data_files_count', pa.int32(), nullable=False), + pa.field('added_delete_files_count', pa.int32(), nullable=False), + pa.field('existing_delete_files_count', pa.int32(), nullable=False), + pa.field('deleted_delete_files_count', pa.int32(), nullable=False), + pa.field('partition_summaries', pa.list_(partition_summary_schema), nullable=False), + ]) + + def _partition_summaries_to_rows( + spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary] + ) -> List[Dict[str, Any]]: + rows = [] + for i, field_summary in enumerate(partition_summaries): + field = spec.fields[i] + partition_field_type = spec.partition_type(self.tbl.schema()).fields[i].field_type + lower_bound = ( + ( + field.transform.to_human_string( + partition_field_type, from_bytes(partition_field_type, field_summary.lower_bound) + ) + ) + if field_summary.lower_bound + else None + ) + upper_bound = ( + ( + field.transform.to_human_string( + partition_field_type, from_bytes(partition_field_type, field_summary.upper_bound) + ) + ) + if field_summary.upper_bound + else None + ) + rows.append({ + 'contains_null': field_summary.contains_null, + 'contains_nan': field_summary.contains_nan, + 'lower_bound': lower_bound, + 'upper_bound': upper_bound, + }) + return rows + + specs = self.tbl.metadata.specs() + manifests = [] + if snapshot := self.tbl.metadata.current_snapshot(): + for manifest in snapshot.manifests(self.tbl.io): + is_data_file = manifest.content == ManifestContent.DATA + is_delete_file = manifest.content == ManifestContent.DELETES + manifests.append({ + 'content': manifest.content, + 'path': manifest.manifest_path, + 'length': manifest.manifest_length, + 'partition_spec_id': manifest.partition_spec_id, + 'added_snapshot_id': manifest.added_snapshot_id, + 'added_data_files_count': manifest.added_files_count if is_data_file else 0, + 'existing_data_files_count': manifest.existing_files_count if is_data_file else 0, + 'deleted_data_files_count': manifest.deleted_files_count if is_data_file else 0, + 'added_delete_files_count': manifest.added_files_count if is_delete_file else 0, + 'existing_delete_files_count': manifest.existing_files_count if is_delete_file else 0, + 'deleted_delete_files_count': manifest.deleted_files_count if is_delete_file else 0, + 'partition_summaries': _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions) + if manifest.partitions + else [], + }) + + return pa.Table.from_pylist( + manifests, + schema=manifest_schema, + ) + @dataclass(frozen=True) class TablePartition: diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index a884f9d4c0..8665435e43 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -445,3 +445,86 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non df = tbl.inspect.partitions(snapshot_id=snapshot.snapshot_id) spark_df = spark.sql(f"SELECT * FROM {identifier}.partitions VERSION AS OF {snapshot.snapshot_id}") check_pyiceberg_df_equals_spark_df(df, spark_df) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_inspect_manifests(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = "default.table_metadata_manifests" + try: + session_catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + spark.sql( + f""" + CREATE TABLE {identifier} ( + id int, + data string + ) + PARTITIONED BY (data) + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (1, "a") + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (2, "b") + """ + ) + + df = session_catalog.load_table(identifier).inspect.manifests() + + assert df.column_names == [ + 'content', + 'path', + 'length', + 'partition_spec_id', + 'added_snapshot_id', + 'added_data_files_count', + 'existing_data_files_count', + 'deleted_data_files_count', + 'added_delete_files_count', + 'existing_delete_files_count', + 'deleted_delete_files_count', + 'partition_summaries', + ] + + int_cols = [ + 'content', + 'length', + 'partition_spec_id', + 'added_snapshot_id', + 'added_data_files_count', + 'existing_data_files_count', + 'deleted_data_files_count', + 'added_delete_files_count', + 'existing_delete_files_count', + 'deleted_delete_files_count', + ] + + for column in int_cols: + for value in df[column]: + assert isinstance(value.as_py(), int) + + for value in df["path"]: + assert isinstance(value.as_py(), str) + + for value in df["partition_summaries"]: + assert isinstance(value.as_py(), list) + for row in value: + assert isinstance(row["contains_null"].as_py(), bool) + assert isinstance(row["contains_nan"].as_py(), (bool, type(None))) + assert isinstance(row["lower_bound"].as_py(), (str, type(None))) + assert isinstance(row["upper_bound"].as_py(), (str, type(None))) + + lhs = spark.table(f"{identifier}.manifests").toPandas() + rhs = df.to_pandas() + for column in df.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + assert left == right, f"Difference in column {column}: {left} != {right}" From 42afc439d362ef1b3dcff03a1ffd959bc0a399ca Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 23 May 2024 11:41:10 +0200 Subject: [PATCH 08/68] Fix: Table-Exists if Server returns 204 (#739) * Fix: Table-Exists if Server returns 204 * Add test for table exist 204 return code --- pyiceberg/catalog/rest.py | 2 +- tests/catalog/test_rest.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index 7259f9fa38..afd5818662 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -790,4 +790,4 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool: response = self._session.head( self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier_tuple)) ) - return response.status_code == 200 + return response.status_code in (200, 204) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index ec5a6a22a4..b5c626d6f0 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -691,6 +691,16 @@ def test_table_exist_200(rest_mock: Mocker) -> None: assert catalog.table_exists(("fokko", "table")) +def test_table_exist_204(rest_mock: Mocker) -> None: + rest_mock.head( + f"{TEST_URI}v1/namespaces/fokko/tables/table", + status_code=204, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + assert catalog.table_exists(("fokko", "table")) + + def test_table_exist_500(rest_mock: Mocker) -> None: rest_mock.head( f"{TEST_URI}v1/namespaces/fokko/tables/table", From 959718a5ede2bcfae5ccf7e54857bab5736f5aea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 23:24:23 -0700 Subject: [PATCH 09/68] Bump duckdb from 0.10.2 to 0.10.3 (#764) Bumps [duckdb](https://github.com/duckdb/duckdb) from 0.10.2 to 0.10.3. - [Release notes](https://github.com/duckdb/duckdb/releases) - [Changelog](https://github.com/duckdb/duckdb/blob/main/tools/release-pip.py) - [Commits](https://github.com/duckdb/duckdb/compare/v0.10.2...v0.10.3) --- updated-dependencies: - dependency-name: duckdb dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 96 ++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/poetry.lock b/poetry.lock index d19640868c..218913620b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1030,58 +1030,58 @@ files = [ [[package]] name = "duckdb" -version = "0.10.2" +version = "0.10.3" description = "DuckDB in-process database" optional = true python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3891d3ac03e12a3e5c43afa3020fe701f64060f52d25f429a1ed7b5d914368d3"}, - {file = "duckdb-0.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f63877651f1fb940e049dc53038eb763856616319acf4f892b1c3ed074f5ab0"}, - {file = "duckdb-0.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:06e3a36f04f4d98d2c0bbdd63e517cfbe114a795306e26ec855e62e076af5043"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf5f95ad5b75c8e65c6508b4df02043dd0b9d97712b9a33236ad77c388ce7861"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff62bc98278c98fecbd6eecec5d698ad41ebd654110feaadbf8ac8bb59b1ecf"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cceede13fde095c23cf9a53adf7c414c7bfb21b9a7aa6a4836014fdbecbfca70"}, - {file = "duckdb-0.10.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:acdfff60b7efccd7f731213a9795851256249dfacf80367074b2b2e144f716dd"}, - {file = "duckdb-0.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a5d5655cf0bdaf664a6f332afe465e02b08cef715548a0983bb7aef48da06a6"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a9d15842876d18763e085648656cccc7660a215d16254906db5c4471be2c7732"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c88cdcdc8452c910e4298223e7d9fca291534ff5aa36090aa49c9e6557550b13"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:364cd6f5dc8a1010d144d08c410ba9a74c521336ee5bda84fabc6616216a6d6a"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c57c11d1060296f5e9ebfb5bb7e5521e0d77912e8f9ff43c90240c3311e9de9"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:186d86b8dda8e1076170eb770bb2bb73ea88ca907d92885c9695d6515207b205"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f65b62f31c6bff21afc0261cfe28d238b8f34ec78f339546b12f4740c39552a"}, - {file = "duckdb-0.10.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a860d7466a5c93714cdd94559ce9e1db2ab91914f0941c25e5e93d4ebe36a5fa"}, - {file = "duckdb-0.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:33308190e9c7f05a3a0a2d46008a043effd4eae77011869d7c18fb37acdd9215"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3a8b2f1229b4aecb79cd28ffdb99032b1497f0a805d0da1136a9b6115e1afc70"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d23a6dea61963733a0f45a0d0bbb1361fb2a47410ed5ff308b4a1f869d4eeb6f"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ee0aa27e688aa52a40b434ec41a50431d0b06edeab88edc2feaca18d82c62c"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80a6d43d9044f0997a15a92e0c0ff3afd21151a1e572a92f439cc4f56b7090e1"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6934758cacd06029a5c9f54556a43bd277a86757e22bf8d0dd11ca15c1813d1c"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a11e2d68bd79044eea5486b1cddb5b915115f537e5c74eeb94c768ce30f9f4b"}, - {file = "duckdb-0.10.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0bf58385c43b8e448a2fea7e8729054934bf73ea616d1d7ef8184eda07f975e2"}, - {file = "duckdb-0.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:eae75c7014597ded6e7f6dc51e32d48362a31608acd73e9f795748ee94335a54"}, - {file = "duckdb-0.10.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62e89deff778a7a86f651802b947a3466425f6cce41e9d7d412d39e492932943"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f87e555fd36ec6da316b727a39fb24c53124a797dfa9b451bdea87b2f20a351f"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41e8b34b1a944590ebcf82f8cc59d67b084fe99479f048892d60da6c1402c386"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c68c6dde2773774cf2371522a3959ea2716fc2b3a4891d4066f0e426455fe19"}, - {file = "duckdb-0.10.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ff6a8a0980d0f9398fa461deffa59465dac190d707468478011ea8a5fe1f2c81"}, - {file = "duckdb-0.10.2-cp37-cp37m-win_amd64.whl", hash = "sha256:728dd4ff0efda387a424754e5508d4f8c72a272c2d3ccb036a83286f60b46002"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c461d6b4619e80170044a9eb999bbf4097e330d3a4974ced0a7eaeb79c7c39f6"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:909351ff72eb3b50b89761251148d8a186594d8a438e12dcf5494794caff6693"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d9eeb8393d69abafd355b869669957eb85b89e4df677e420b9ef0693b7aa6cb4"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3102bcf5011e8f82ea3c2bde43108774fe5a283a410d292c0843610ea13e2237"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d64d443613e5f16caf7d67102733538c90f7715867c1a98597efd3babca068e3"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cb31398826d1b7473344e5ee8e0f826370c9752549469ba1327042ace9041f80"}, - {file = "duckdb-0.10.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d09dcec467cd6127d5cc1fb0ce4efbd77e761882d9d772b0f64fc2f79a2a1cde"}, - {file = "duckdb-0.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:82fab1a24faf7c33d8a7afed08b57ee36e8821a3a68a2f1574cd238ea440bba0"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38607e6e6618e8ea28c8d9b67aa9e22cfd6d6d673f2e8ab328bd6e867b697f69"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fb0c23bc8c09615bff38aebcf8e92e6ae74959c67b3c9e5b00edddc730bf22be"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:00576c11c78c83830ab483bad968e07cd9b5f730e7ffaf5aa5fadee5ac4f71e9"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077db692cdda50c4684ef87dc2a68507665804caa90e539dbe819116bda722ad"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca25984ad9f9a04e46e8359f852668c11569534e3bb8424b80be711303ad2314"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a72cc40982c7b92cf555e574618fc711033b013bf258b611ba18d7654c89d8c"}, - {file = "duckdb-0.10.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27b9efd6e788eb561535fdc0cbc7c74aca1ff39f748b7cfc27aa49b00e22da1"}, - {file = "duckdb-0.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:4800469489bc262dda61a7f1d40acedf67cf2454874e9d8bbf07920dc2b147e6"}, - {file = "duckdb-0.10.2.tar.gz", hash = "sha256:0f609c9d5f941f1ecde810f010dd9321cd406a552c1df20318a13fa64247f67f"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b48f5f1542f1e4b184e6b4fc188f497be8b9c48127867e7d9a5f4a3e334f88b0"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e327f7a3951ea154bb56e3fef7da889e790bd9a67ca3c36afc1beb17d3feb6d6"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d8b20ed67da004b4481973f4254fd79a0e5af957d2382eac8624b5c527ec48c"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d37680b8d7be04e4709db3a66c8b3eb7ceba2a5276574903528632f2b2cc2e60"}, + {file = "duckdb-0.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d34b86d6a2a6dfe8bb757f90bfe7101a3bd9e3022bf19dbddfa4b32680d26a9"}, + {file = "duckdb-0.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:73b1cb283ca0f6576dc18183fd315b4e487a545667ffebbf50b08eb4e8cdc143"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d917dde19fcec8cadcbef1f23946e85dee626ddc133e1e3f6551f15a61a03c61"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46757e0cf5f44b4cb820c48a34f339a9ccf83b43d525d44947273a585a4ed822"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:338c14d8ac53ac4aa9ec03b6f1325ecfe609ceeb72565124d489cb07f8a1e4eb"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:651fcb429602b79a3cf76b662a39e93e9c3e6650f7018258f4af344c816dab72"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3ae3c73b98b6215dab93cc9bc936b94aed55b53c34ba01dec863c5cab9f8e25"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56429b2cfe70e367fb818c2be19f59ce2f6b080c8382c4d10b4f90ba81f774e9"}, + {file = "duckdb-0.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b46c02c2e39e3676b1bb0dc7720b8aa953734de4fd1b762e6d7375fbeb1b63af"}, + {file = "duckdb-0.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:bcd460feef56575af2c2443d7394d405a164c409e9794a4d94cb5fdaa24a0ba4"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e229a7c6361afbb0d0ab29b1b398c10921263c52957aefe3ace99b0426fdb91e"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:732b1d3b6b17bf2f32ea696b9afc9e033493c5a3b783c292ca4b0ee7cc7b0e66"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5380d4db11fec5021389fb85d614680dc12757ef7c5881262742250e0b58c75"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:468a4e0c0b13c55f84972b1110060d1b0f854ffeb5900a178a775259ec1562db"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa1e7ff8d18d71defa84e79f5c86aa25d3be80d7cb7bc259a322de6d7cc72da"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed1063ed97c02e9cf2e7fd1d280de2d1e243d72268330f45344c69c7ce438a01"}, + {file = "duckdb-0.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:22f2aad5bb49c007f3bfcd3e81fdedbc16a2ae41f2915fc278724ca494128b0c"}, + {file = "duckdb-0.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:8f9e2bb00a048eb70b73a494bdc868ce7549b342f7ffec88192a78e5a4e164bd"}, + {file = "duckdb-0.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6c2fc49875b4b54e882d68703083ca6f84b27536d57d623fc872e2f502b1078"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a66c125d0c30af210f7ee599e7821c3d1a7e09208196dafbf997d4e0cfcb81ab"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99dd7a1d901149c7a276440d6e737b2777e17d2046f5efb0c06ad3b8cb066a6"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ec3bbdb209e6095d202202893763e26c17c88293b88ef986b619e6c8b6715bd"}, + {file = "duckdb-0.10.3-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:2b3dec4ef8ed355d7b7230b40950b30d0def2c387a2e8cd7efc80b9d14134ecf"}, + {file = "duckdb-0.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:04129f94fb49bba5eea22f941f0fb30337f069a04993048b59e2811f52d564bc"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d75d67024fc22c8edfd47747c8550fb3c34fb1cbcbfd567e94939ffd9c9e3ca7"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3796e9507c02d0ddbba2e84c994fae131da567ce3d9cbb4cbcd32fadc5fbb26"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78e539d85ebd84e3e87ec44d28ad912ca4ca444fe705794e0de9be3dd5550c11"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a99b67ac674b4de32073e9bc604b9c2273d399325181ff50b436c6da17bf00a"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1209a354a763758c4017a1f6a9f9b154a83bed4458287af9f71d84664ddb86b6"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b735cea64aab39b67c136ab3a571dbf834067f8472ba2f8bf0341bc91bea820"}, + {file = "duckdb-0.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:816ffb9f758ed98eb02199d9321d592d7a32a6cb6aa31930f4337eb22cfc64e2"}, + {file = "duckdb-0.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:1631184b94c3dc38b13bce4045bf3ae7e1b0ecbfbb8771eb8d751d8ffe1b59b3"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb98c35fc8dd65043bc08a2414dd9f59c680d7e8656295b8969f3f2061f26c52"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e75c9f5b6a92b2a6816605c001d30790f6d67ce627a2b848d4d6040686efdf9"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae786eddf1c2fd003466e13393b9348a44b6061af6fe7bcb380a64cac24e7df7"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9387da7b7973707b0dea2588749660dd5dd724273222680e985a2dd36787668"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:538f943bf9fa8a3a7c4fafa05f21a69539d2c8a68e557233cbe9d989ae232899"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6930608f35025a73eb94252964f9f19dd68cf2aaa471da3982cf6694866cfa63"}, + {file = "duckdb-0.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:03bc54a9cde5490918aad82d7d2a34290e3dfb78d5b889c6626625c0f141272a"}, + {file = "duckdb-0.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:372b6e3901d85108cafe5df03c872dfb6f0dbff66165a0cf46c47246c1957aa0"}, + {file = "duckdb-0.10.3.tar.gz", hash = "sha256:c5bd84a92bc708d3a6adffe1f554b94c6e76c795826daaaf482afc3d9c636971"}, ] [[package]] From ed83e84edc860be59fb6663b3985afe47ca5340e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 23:41:59 -0700 Subject: [PATCH 10/68] Bump griffe from 0.45.1 to 0.45.2 (#765) Bumps [griffe](https://github.com/mkdocstrings/griffe) from 0.45.1 to 0.45.2. - [Release notes](https://github.com/mkdocstrings/griffe/releases) - [Changelog](https://github.com/mkdocstrings/griffe/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/griffe/compare/0.45.1...0.45.2) --- updated-dependencies: - dependency-name: griffe dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 5d375f50f3..f1c7b0e06d 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,7 +16,7 @@ # under the License. mkdocs==1.6.0 -griffe==0.45.1 +griffe==0.45.2 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.3 From b8023d294ac057abc15a19f05ee212517362d3d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 23:42:12 -0700 Subject: [PATCH 11/68] Bump typing-extensions from 4.11.0 to 4.12.0 (#767) Bumps [typing-extensions](https://github.com/python/typing_extensions) from 4.11.0 to 4.12.0. - [Release notes](https://github.com/python/typing_extensions/releases) - [Changelog](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md) - [Commits](https://github.com/python/typing_extensions/compare/4.11.0...4.12.0) --- updated-dependencies: - dependency-name: typing-extensions dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 218913620b..a0352f0434 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4083,13 +4083,13 @@ telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.11.0" +version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, - {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, ] [[package]] @@ -4462,4 +4462,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "2c019a99dfec370111ef19bae1ca7e00f434cec159296f5fcf4aee1b4552ba06" +content-hash = "8024e9ca0aa700346e902b232337c8bad69e5cd6e482db4999446f6177e7646d" diff --git a/pyproject.toml b/pyproject.toml index fafa5231a2..3a928ec47c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ fastavro = "1.9.4" coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.12.1" moto = { version = "^5.0.2", extras = ["server"] } -typing-extensions = "4.11.0" +typing-extensions = "4.12.0" pytest-mock = "3.14.0" pyspark = "3.5.1" cython = "3.0.10" From a132be1fdb64f99ae4b3ddaf5dfe7991ba1917c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 May 2024 07:03:46 +0200 Subject: [PATCH 12/68] Bump mkdocs-material from 9.5.24 to 9.5.25 (#770) --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index f1c7b0e06d..22ded02b4c 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -23,6 +23,6 @@ mkdocstrings-python==1.10.3 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.24 +mkdocs-material==9.5.25 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.9 From 8968996fbde6e7892e0b576837acb335a52d9caf Mon Sep 17 00:00:00 2001 From: Kev Wang Date: Mon, 27 May 2024 22:47:46 -0700 Subject: [PATCH 13/68] Add azure configuration variables (#745) --- pyiceberg/io/__init__.py | 7 +++++++ pyiceberg/io/fsspec.py | 21 ++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 4b5e99d336..1a78f306c6 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -57,6 +57,13 @@ HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" HDFS_KERB_TICKET = "hdfs.kerberos_ticket" +ADLFS_CONNECTION_STRING = "adlfs.connection-string" +ADLFS_ACCOUNT_NAME = "adlfs.account-name" +ADLFS_ACCOUNT_KEY = "adlfs.account-key" +ADLFS_SAS_TOKEN = "adlfs.sas-token" +ADLFS_TENANT_ID = "adlfs.tenant-id" +ADLFS_CLIENT_ID = "adlfs.client-id" +ADLFS_ClIENT_SECRET = "adlfs.client-secret" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index ee97829c2e..1089c9fe50 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -40,6 +40,12 @@ from pyiceberg.catalog import TOKEN from pyiceberg.exceptions import SignError from pyiceberg.io import ( + ADLFS_ACCOUNT_KEY, + ADLFS_ACCOUNT_NAME, + ADLFS_CLIENT_ID, + ADLFS_CONNECTION_STRING, + ADLFS_SAS_TOKEN, + ADLFS_TENANT_ID, GCS_ACCESS, GCS_CACHE_TIMEOUT, GCS_CONSISTENCY, @@ -57,6 +63,7 @@ S3_REGION, S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, + ADLFS_ClIENT_SECRET, FileIO, InputFile, InputStream, @@ -163,13 +170,13 @@ def _adlfs(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem return AzureBlobFileSystem( - connection_string=properties.get("adlfs.connection-string"), - account_name=properties.get("adlfs.account-name"), - account_key=properties.get("adlfs.account-key"), - sas_token=properties.get("adlfs.sas-token"), - tenant_id=properties.get("adlfs.tenant-id"), - client_id=properties.get("adlfs.client-id"), - client_secret=properties.get("adlfs.client-secret"), + connection_string=properties.get(ADLFS_CONNECTION_STRING), + account_name=properties.get(ADLFS_ACCOUNT_NAME), + account_key=properties.get(ADLFS_ACCOUNT_KEY), + sas_token=properties.get(ADLFS_SAS_TOKEN), + tenant_id=properties.get(ADLFS_TENANT_ID), + client_id=properties.get(ADLFS_CLIENT_ID), + client_secret=properties.get(ADLFS_ClIENT_SECRET), ) From ee2a7c52b835c98dd08e74e9ef26c356692304a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 23:01:16 -0700 Subject: [PATCH 14/68] Bump moto from 5.0.7 to 5.0.8 (#771) Bumps [moto](https://github.com/getmoto/moto) from 5.0.7 to 5.0.8. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.0.7...5.0.8) --- updated-dependencies: - dependency-name: moto dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index a0352f0434..b56e2d89bd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2212,13 +2212,13 @@ test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] [[package]] name = "moto" -version = "5.0.7" +version = "5.0.8" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "moto-5.0.7-py2.py3-none-any.whl", hash = "sha256:c0214c1361fb1dc85f587d9ce17cd988c6f69ff0ed54d43789654022e0e744f2"}, - {file = "moto-5.0.7.tar.gz", hash = "sha256:f2cde691dc4bc675e318a65f018902ac7f89d61bf2646052f7df215d212f069e"}, + {file = "moto-5.0.8-py2.py3-none-any.whl", hash = "sha256:7d1035e366434bfa9fcc0621f07d5aa724b6846408071d540137a0554c46f214"}, + {file = "moto-5.0.8.tar.gz", hash = "sha256:517fb808dc718bcbdda54c6ffeaca0adc34cf6e10821bfb01216ce420a31765c"}, ] [package.dependencies] From 54aacb41c57edce8ece9278acc7eb0a7e92ef03e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 23:01:26 -0700 Subject: [PATCH 15/68] Bump coverage from 7.5.1 to 7.5.2 (#772) Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.5.1 to 7.5.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/7.5.1...7.5.2) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 106 ++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/poetry.lock b/poetry.lock index b56e2d89bd..4ef706c873 100644 --- a/poetry.lock +++ b/poetry.lock @@ -652,63 +652,63 @@ files = [ [[package]] name = "coverage" -version = "7.5.1" +version = "7.5.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0884920835a033b78d1c73b6d3bbcda8161a900f38a488829a83982925f6c2e"}, - {file = "coverage-7.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:39afcd3d4339329c5f58de48a52f6e4e50f6578dd6099961cf22228feb25f38f"}, - {file = "coverage-7.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b0ceee8147444347da6a66be737c9d78f3353b0681715b668b72e79203e4a"}, - {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a9ca3f2fae0088c3c71d743d85404cec8df9be818a005ea065495bedc33da35"}, - {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd215c0c7d7aab005221608a3c2b46f58c0285a819565887ee0b718c052aa4e"}, - {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4bf0655ab60d754491004a5efd7f9cccefcc1081a74c9ef2da4735d6ee4a6223"}, - {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:61c4bf1ba021817de12b813338c9be9f0ad5b1e781b9b340a6d29fc13e7c1b5e"}, - {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db66fc317a046556a96b453a58eced5024af4582a8dbdc0c23ca4dbc0d5b3146"}, - {file = "coverage-7.5.1-cp310-cp310-win32.whl", hash = "sha256:b016ea6b959d3b9556cb401c55a37547135a587db0115635a443b2ce8f1c7228"}, - {file = "coverage-7.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:df4e745a81c110e7446b1cc8131bf986157770fa405fe90e15e850aaf7619bc8"}, - {file = "coverage-7.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:796a79f63eca8814ca3317a1ea443645c9ff0d18b188de470ed7ccd45ae79428"}, - {file = "coverage-7.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fc84a37bfd98db31beae3c2748811a3fa72bf2007ff7902f68746d9757f3746"}, - {file = "coverage-7.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6175d1a0559986c6ee3f7fccfc4a90ecd12ba0a383dcc2da30c2b9918d67d8a3"}, - {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fc81d5878cd6274ce971e0a3a18a8803c3fe25457165314271cf78e3aae3aa2"}, - {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:556cf1a7cbc8028cb60e1ff0be806be2eded2daf8129b8811c63e2b9a6c43bca"}, - {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9981706d300c18d8b220995ad22627647be11a4276721c10911e0e9fa44c83e8"}, - {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d7fed867ee50edf1a0b4a11e8e5d0895150e572af1cd6d315d557758bfa9c057"}, - {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef48e2707fb320c8f139424a596f5b69955a85b178f15af261bab871873bb987"}, - {file = "coverage-7.5.1-cp311-cp311-win32.whl", hash = "sha256:9314d5678dcc665330df5b69c1e726a0e49b27df0461c08ca12674bcc19ef136"}, - {file = "coverage-7.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fa567e99765fe98f4e7d7394ce623e794d7cabb170f2ca2ac5a4174437e90dd"}, - {file = "coverage-7.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b6cf3764c030e5338e7f61f95bd21147963cf6aa16e09d2f74f1fa52013c1206"}, - {file = "coverage-7.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ec92012fefebee89a6b9c79bc39051a6cb3891d562b9270ab10ecfdadbc0c34"}, - {file = "coverage-7.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16db7f26000a07efcf6aea00316f6ac57e7d9a96501e990a36f40c965ec7a95d"}, - {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beccf7b8a10b09c4ae543582c1319c6df47d78fd732f854ac68d518ee1fb97fa"}, - {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8748731ad392d736cc9ccac03c9845b13bb07d020a33423fa5b3a36521ac6e4e"}, - {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7352b9161b33fd0b643ccd1f21f3a3908daaddf414f1c6cb9d3a2fd618bf2572"}, - {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a588d39e0925f6a2bff87154752481273cdb1736270642aeb3635cb9b4cad07"}, - {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:68f962d9b72ce69ea8621f57551b2fa9c70509af757ee3b8105d4f51b92b41a7"}, - {file = "coverage-7.5.1-cp312-cp312-win32.whl", hash = "sha256:f152cbf5b88aaeb836127d920dd0f5e7edff5a66f10c079157306c4343d86c19"}, - {file = "coverage-7.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:5a5740d1fb60ddf268a3811bcd353de34eb56dc24e8f52a7f05ee513b2d4f596"}, - {file = "coverage-7.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e2213def81a50519d7cc56ed643c9e93e0247f5bbe0d1247d15fa520814a7cd7"}, - {file = "coverage-7.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5037f8fcc2a95b1f0e80585bd9d1ec31068a9bcb157d9750a172836e98bc7a90"}, - {file = "coverage-7.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3721c2c9e4c4953a41a26c14f4cef64330392a6d2d675c8b1db3b645e31f0e"}, - {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca498687ca46a62ae590253fba634a1fe9836bc56f626852fb2720f334c9e4e5"}, - {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cdcbc320b14c3e5877ee79e649677cb7d89ef588852e9583e6b24c2e5072661"}, - {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:57e0204b5b745594e5bc14b9b50006da722827f0b8c776949f1135677e88d0b8"}, - {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fe7502616b67b234482c3ce276ff26f39ffe88adca2acf0261df4b8454668b4"}, - {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9e78295f4144f9dacfed4f92935fbe1780021247c2fabf73a819b17f0ccfff8d"}, - {file = "coverage-7.5.1-cp38-cp38-win32.whl", hash = "sha256:1434e088b41594baa71188a17533083eabf5609e8e72f16ce8c186001e6b8c41"}, - {file = "coverage-7.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:0646599e9b139988b63704d704af8e8df7fa4cbc4a1f33df69d97f36cb0a38de"}, - {file = "coverage-7.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4cc37def103a2725bc672f84bd939a6fe4522310503207aae4d56351644682f1"}, - {file = "coverage-7.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc0b4d8bfeabd25ea75e94632f5b6e047eef8adaed0c2161ada1e922e7f7cece"}, - {file = "coverage-7.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d0a0f5e06881ecedfe6f3dd2f56dcb057b6dbeb3327fd32d4b12854df36bf26"}, - {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9735317685ba6ec7e3754798c8871c2f49aa5e687cc794a0b1d284b2389d1bd5"}, - {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d21918e9ef11edf36764b93101e2ae8cc82aa5efdc7c5a4e9c6c35a48496d601"}, - {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c3e757949f268364b96ca894b4c342b41dc6f8f8b66c37878aacef5930db61be"}, - {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:79afb6197e2f7f60c4824dd4b2d4c2ec5801ceb6ba9ce5d2c3080e5660d51a4f"}, - {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1d0d98d95dd18fe29dc66808e1accf59f037d5716f86a501fc0256455219668"}, - {file = "coverage-7.5.1-cp39-cp39-win32.whl", hash = "sha256:1cc0fe9b0b3a8364093c53b0b4c0c2dd4bb23acbec4c9240b5f284095ccf7981"}, - {file = "coverage-7.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:dde0070c40ea8bb3641e811c1cfbf18e265d024deff6de52c5950677a8fb1e0f"}, - {file = "coverage-7.5.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:6537e7c10cc47c595828b8a8be04c72144725c383c4702703ff4e42e44577312"}, - {file = "coverage-7.5.1.tar.gz", hash = "sha256:54de9ef3a9da981f7af93eafde4ede199e0846cd819eb27c88e2b712aae9708c"}, + {file = "coverage-7.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:554c7327bf0fd688050348e22db7c8e163fb7219f3ecdd4732d7ed606b417263"}, + {file = "coverage-7.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d0305e02e40c7cfea5d08d6368576537a74c0eea62b77633179748d3519d6705"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:829fb55ad437d757c70d5b1c51cfda9377f31506a0a3f3ac282bc6a387d6a5f1"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:894b1acded706f1407a662d08e026bfd0ff1e59e9bd32062fea9d862564cfb65"}, + {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe76d6dee5e4febefa83998b17926df3a04e5089e3d2b1688c74a9157798d7a2"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c7ebf2a37e4f5fea3c1a11e1f47cea7d75d0f2d8ef69635ddbd5c927083211fc"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20e611fc36e1a0fc7bbf957ef9c635c8807d71fbe5643e51b2769b3cc0fb0b51"}, + {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c5c5b7ae2763533152880d5b5b451acbc1089ade2336b710a24b2b0f5239d20"}, + {file = "coverage-7.5.2-cp310-cp310-win32.whl", hash = "sha256:1e4225990a87df898e40ca31c9e830c15c2c53b1d33df592bc8ef314d71f0281"}, + {file = "coverage-7.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:976cd92d9420e6e2aa6ce6a9d61f2b490e07cb468968adf371546b33b829284b"}, + {file = "coverage-7.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5997d418c219dcd4dcba64e50671cca849aaf0dac3d7a2eeeb7d651a5bd735b8"}, + {file = "coverage-7.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec27e93bbf5976f0465e8936f02eb5add99bbe4e4e7b233607e4d7622912d68d"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f11f98753800eb1ec872562a398081f6695f91cd01ce39819e36621003ec52a"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e34680049eecb30b6498784c9637c1c74277dcb1db75649a152f8004fbd6646"}, + {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e12536446ad4527ac8ed91d8a607813085683bcce27af69e3b31cd72b3c5960"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d3f7744b8a8079d69af69d512e5abed4fb473057625588ce126088e50d05493"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:431a3917e32223fcdb90b79fe60185864a9109631ebc05f6c5aa03781a00b513"}, + {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a7c6574225f34ce45466f04751d957b5c5e6b69fca9351db017c9249786172ce"}, + {file = "coverage-7.5.2-cp311-cp311-win32.whl", hash = "sha256:2b144d142ec9987276aeff1326edbc0df8ba4afbd7232f0ca10ad57a115e95b6"}, + {file = "coverage-7.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:900532713115ac58bc3491b9d2b52704a05ed408ba0918d57fd72c94bc47fba1"}, + {file = "coverage-7.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a42970ce74c88bdf144df11c52c5cf4ad610d860de87c0883385a1c9d9fa4ab"}, + {file = "coverage-7.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26716a1118c6ce2188283b4b60a898c3be29b480acbd0a91446ced4fe4e780d8"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60b66b0363c5a2a79fba3d1cd7430c25bbd92c923d031cae906bdcb6e054d9a2"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d22eba19273b2069e4efeff88c897a26bdc64633cbe0357a198f92dca94268"}, + {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb5b92a0ab3d22dfdbfe845e2fef92717b067bdf41a5b68c7e3e857c0cff1a4"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1aef719b6559b521ae913ddeb38f5048c6d1a3d366865e8b320270b7bc4693c2"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8809c0ea0e8454f756e3bd5c36d04dddf222989216788a25bfd6724bfcee342c"}, + {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1acc2e2ef098a1d4bf535758085f508097316d738101a97c3f996bccba963ea5"}, + {file = "coverage-7.5.2-cp312-cp312-win32.whl", hash = "sha256:97de509043d3f0f2b2cd171bdccf408f175c7f7a99d36d566b1ae4dd84107985"}, + {file = "coverage-7.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:8941e35a0e991a7a20a1fa3e3182f82abe357211f2c335a9e6007067c3392fcf"}, + {file = "coverage-7.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5662bf0f6fb6757f5c2d6279c541a5af55a39772c2362ed0920b27e3ce0e21f7"}, + {file = "coverage-7.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d9c62cff2ffb4c2a95328488fd7aa96a7a4b34873150650fe76b19c08c9c792"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74eeaa13e8200ad72fca9c5f37395fb310915cec6f1682b21375e84fd9770e84"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f29bf497d51a5077994b265e976d78b09d9d0dff6ca5763dbb4804534a5d380"}, + {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f96aa94739593ae0707eda9813ce363a0a0374a810ae0eced383340fc4a1f73"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:51b6cee539168a912b4b3b040e4042b9e2c9a7ad9c8546c09e4eaeff3eacba6b"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:59a75e6aa5c25b50b5a1499f9718f2edff54257f545718c4fb100f48d570ead4"}, + {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29da75ce20cb0a26d60e22658dd3230713c6c05a3465dd8ad040ffc991aea318"}, + {file = "coverage-7.5.2-cp38-cp38-win32.whl", hash = "sha256:23f2f16958b16152b43a39a5ecf4705757ddd284b3b17a77da3a62aef9c057ef"}, + {file = "coverage-7.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:9e41c94035e5cdb362beed681b58a707e8dc29ea446ea1713d92afeded9d1ddd"}, + {file = "coverage-7.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:06d96b9b19bbe7f049c2be3c4f9e06737ec6d8ef8933c7c3a4c557ef07936e46"}, + {file = "coverage-7.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:878243e1206828908a6b4a9ca7b1aa8bee9eb129bf7186fc381d2646f4524ce9"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:482df956b055d3009d10fce81af6ffab28215d7ed6ad4a15e5c8e67cb7c5251c"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a35c97af60a5492e9e89f8b7153fe24eadfd61cb3a2fb600df1a25b5dab34b7e"}, + {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24bb4c7859a3f757a116521d4d3a8a82befad56ea1bdacd17d6aafd113b0071e"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e1046aab24c48c694f0793f669ac49ea68acde6a0798ac5388abe0a5615b5ec8"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:448ec61ea9ea7916d5579939362509145caaecf03161f6f13e366aebb692a631"}, + {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4a00bd5ba8f1a4114720bef283cf31583d6cb1c510ce890a6da6c4268f0070b7"}, + {file = "coverage-7.5.2-cp39-cp39-win32.whl", hash = "sha256:9f805481d5eff2a96bac4da1570ef662bf970f9a16580dc2c169c8c3183fa02b"}, + {file = "coverage-7.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:2c79f058e7bec26b5295d53b8c39ecb623448c74ccc8378631f5cb5c16a7e02c"}, + {file = "coverage-7.5.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:40dbb8e7727560fe8ab65efcddfec1ae25f30ef02e2f2e5d78cfb52a66781ec5"}, + {file = "coverage-7.5.2.tar.gz", hash = "sha256:13017a63b0e499c59b5ba94a8542fb62864ba3016127d1e4ef30d354fc2b00e9"}, ] [package.dependencies] From 756ae625a2ea0f9c12df78430512ce991f6a1976 Mon Sep 17 00:00:00 2001 From: "Eric L (CCCS)" Date: Tue, 28 May 2024 03:52:24 -0400 Subject: [PATCH 16/68] Introduce hierarchical namespaces into SqlCatalog (#591) * Introduce hierarchical namespaces into SqlCatalog * Fix SqlCatalog unit tests broken from code update. --- pyiceberg/catalog/__init__.py | 25 +- pyiceberg/catalog/sql.py | 159 ++++--- pyiceberg/cli/console.py | 8 +- tests/catalog/test_sql.py | 847 +++++++++++++++++++++++++--------- tests/conftest.py | 13 + 5 files changed, 758 insertions(+), 294 deletions(-) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 0b70fe32e1..ea2bc65760 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -588,7 +588,7 @@ def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier: If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is. Args: - identifier (str | Identifier: an identifier, either a string or tuple of strings. + identifier (str | Identifier): an identifier, either a string or tuple of strings. Returns: Identifier: a tuple of strings. @@ -619,6 +619,29 @@ def namespace_from(identifier: Union[str, Identifier]) -> Identifier: """ return Catalog.identifier_to_tuple(identifier)[:-1] + @staticmethod + def namespace_to_string( + identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError + ) -> str: + """Transform a namespace identifier into a string. + + Args: + identifier (Union[str, Identifier]): a namespace identifier. + err (Union[Type[ValueError], Type[NoSuchNamespaceError]]): the error type to raise when identifier is empty. + + Returns: + Identifier: Namespace identifier. + """ + tuple_identifier = Catalog.identifier_to_tuple(identifier) + if len(tuple_identifier) < 1: + raise err("Empty namespace identifier") + + # Check if any segment of the tuple is an empty string + if any(segment.strip() == "" for segment in tuple_identifier): + raise err("Namespace identifier contains an empty segment or a segment with only whitespace") + + return ".".join(segment.strip() for segment in tuple_identifier) + @staticmethod def identifier_to_database( identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 978109b2a3..6c198767e7 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -43,6 +43,7 @@ from pyiceberg.catalog import ( METADATA_LOCATION, + Catalog, MetastoreCatalog, PropertiesUpdateSummary, ) @@ -94,6 +95,16 @@ class IcebergNamespaceProperties(SqlCatalogBaseTable): class SqlCatalog(MetastoreCatalog): + """Implementation of a SQL based catalog. + + In the `JDBCCatalog` implementation, a `Namespace` is composed of a list of strings separated by dots: `'ns1.ns2.ns3'`. + And you can have as many levels as you want, but you need at least one. The `SqlCatalog` honors the same convention. + + In the `JDBCCatalog` implementation, a `TableIdentifier` is composed of an optional `Namespace` and a table name. + When a `Namespace` is present, the full name will be `'ns1.ns2.ns3.table'`. A valid `TableIdentifier` could be `'name'` (no namespace). + The `SqlCatalog` has a different convention where a `TableIdentifier` requires a `Namespace`. + """ + def __init__(self, name: str, **properties: str): super().__init__(name, **properties) @@ -136,7 +147,7 @@ def _convert_orm_to_iceberg(self, orm_table: IcebergTables) -> Table: file = io.new_input(metadata_location) metadata = FromInputFile.table_metadata(file) return Table( - identifier=(self.name, table_namespace, table_name), + identifier=(self.name,) + Catalog.identifier_to_tuple(table_namespace) + (table_name,), metadata=metadata, metadata_location=metadata_location, io=self._load_file_io(metadata.properties, metadata_location), @@ -173,11 +184,14 @@ def create_table( """ schema: Schema = self._convert_schema_if_needed(schema) # type: ignore - database_name, table_name = self.identifier_to_database_and_table(identifier) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {database_name}") + identifier_nocatalog = self.identifier_to_tuple_without_catalog(identifier) + namespace_identifier = Catalog.namespace_from(identifier_nocatalog) + table_name = Catalog.table_name_from(identifier_nocatalog) + if not self._namespace_exists(namespace_identifier): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace_identifier}") - location = self._resolve_table_location(location, database_name, table_name) + namespace = Catalog.namespace_to_string(namespace_identifier) + location = self._resolve_table_location(location, namespace, table_name) metadata_location = self._get_metadata_location(location=location) metadata = new_table_metadata( location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties @@ -190,7 +204,7 @@ def create_table( session.add( IcebergTables( catalog_name=self.name, - table_namespace=database_name, + table_namespace=namespace, table_name=table_name, metadata_location=metadata_location, previous_metadata_location=None, @@ -198,7 +212,7 @@ def create_table( ) session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e return self.load_table(identifier=identifier) @@ -216,16 +230,19 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: TableAlreadyExistsError: If the table already exists NoSuchNamespaceError: If namespace does not exist """ - database_name, table_name = self.identifier_to_database_and_table(identifier) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {database_name}") + identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") with Session(self.engine) as session: try: session.add( IcebergTables( catalog_name=self.name, - table_namespace=database_name, + table_namespace=namespace, table_name=table_name, metadata_location=metadata_location, previous_metadata_location=None, @@ -233,7 +250,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: ) session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e return self.load_table(identifier=identifier) @@ -253,17 +270,19 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: NoSuchTableError: If a table with the name does not exist. """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) with Session(self.engine) as session: stmt = select(IcebergTables).where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) result = session.scalar(stmt) if result: return self._convert_orm_to_iceberg(result) - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") def drop_table(self, identifier: Union[str, Identifier]) -> None: """Drop a table. @@ -275,18 +294,20 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None: NoSuchTableError: If a table with the name does not exist. """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) with Session(self.engine) as session: if self.engine.dialect.supports_sane_rowcount: res = session.execute( delete(IcebergTables).where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) ) if res.rowcount < 1: - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") else: try: tbl = ( @@ -294,14 +315,14 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None: .with_for_update(of=IcebergTables) .filter( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) .one() ) session.delete(tbl) except NoResultFound as e: - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") from e session.commit() def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table: @@ -320,10 +341,15 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U NoSuchNamespaceError: If the target namespace does not exist. """ from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier) - from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError) - to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) - if not self._namespace_exists(to_database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {to_database_name}") + to_identifier_tuple = self.identifier_to_tuple_without_catalog(to_identifier) + from_namespace_tuple = Catalog.namespace_from(from_identifier_tuple) + from_namespace = Catalog.namespace_to_string(from_namespace_tuple) + from_table_name = Catalog.table_name_from(from_identifier_tuple) + to_namespace_tuple = Catalog.namespace_from(to_identifier_tuple) + to_namespace = Catalog.namespace_to_string(to_namespace_tuple) + to_table_name = Catalog.table_name_from(to_identifier_tuple) + if not self._namespace_exists(to_namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {to_namespace}") with Session(self.engine) as session: try: if self.engine.dialect.supports_sane_rowcount: @@ -331,10 +357,10 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U update(IcebergTables) .where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == from_database_name, + IcebergTables.table_namespace == from_namespace, IcebergTables.table_name == from_table_name, ) - .values(table_namespace=to_database_name, table_name=to_table_name) + .values(table_namespace=to_namespace, table_name=to_table_name) ) result = session.execute(stmt) if result.rowcount < 1: @@ -346,18 +372,18 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U .with_for_update(of=IcebergTables) .filter( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == from_database_name, + IcebergTables.table_namespace == from_namespace, IcebergTables.table_name == from_table_name, ) .one() ) - tbl.table_namespace = to_database_name + tbl.table_namespace = to_namespace tbl.table_name = to_table_name except NoResultFound as e: raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {to_database_name}.{to_table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {to_namespace}.{to_table_name} already exists") from e return self.load_table(to_identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: @@ -377,7 +403,9 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) ) current_table = self.load_table(identifier_tuple) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) base_metadata = current_table.metadata for requirement in table_request.requirements: requirement.validate(base_metadata) @@ -398,7 +426,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons update(IcebergTables) .where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, IcebergTables.metadata_location == current_table.metadata_location, ) @@ -406,7 +434,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons ) result = session.execute(stmt) if result.rowcount < 1: - raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}") + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") else: try: tbl = ( @@ -414,7 +442,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons .with_for_update(of=IcebergTables) .filter( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, IcebergTables.metadata_location == current_table.metadata_location, ) @@ -423,13 +451,14 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons tbl.metadata_location = new_metadata_location tbl.previous_metadata_location = current_table.metadata_location except NoResultFound as e: - raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}") from e + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e session.commit() return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool: - namespace = self.identifier_to_database(identifier) + namespace_tuple = Catalog.identifier_to_tuple(identifier) + namespace = Catalog.namespace_to_string(namespace_tuple, NoSuchNamespaceError) with Session(self.engine) as session: stmt = ( select(IcebergTables) @@ -462,18 +491,20 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper Raises: NamespaceAlreadyExistsError: If a namespace with the given name already exists. """ + if self._namespace_exists(namespace): + raise NamespaceAlreadyExistsError(f"Namespace {namespace} already exists") + if not properties: properties = IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES - database_name = self.identifier_to_database(namespace) - if self._namespace_exists(database_name): - raise NamespaceAlreadyExistsError(f"Database {database_name} already exists") - create_properties = properties if properties else IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES with Session(self.engine) as session: for key, value in create_properties.items(): session.add( IcebergNamespaceProperties( - catalog_name=self.name, namespace=database_name, property_key=key, property_value=value + catalog_name=self.name, + namespace=Catalog.namespace_to_string(namespace, NoSuchNamespaceError), + property_key=key, + property_value=value, ) ) session.commit() @@ -488,16 +519,16 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None: NoSuchNamespaceError: If a namespace with the given name does not exist. NamespaceNotEmptyError: If the namespace is not empty. """ - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) - if self._namespace_exists(database_name): - if tables := self.list_tables(database_name): - raise NamespaceNotEmptyError(f"Database {database_name} is not empty. {len(tables)} tables exist.") + if self._namespace_exists(namespace): + namespace_str = Catalog.namespace_to_string(namespace) + if tables := self.list_tables(namespace): + raise NamespaceNotEmptyError(f"Namespace {namespace_str} is not empty. {len(tables)} tables exist.") with Session(self.engine) as session: session.execute( delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, ) ) session.commit() @@ -516,14 +547,14 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + if namespace and not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") - stmt = select(IcebergTables).where( - IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == database_name - ) + namespace = Catalog.namespace_to_string(namespace) + stmt = select(IcebergTables).where(IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace) with Session(self.engine) as session: result = session.scalars(stmt) - return [(table.table_namespace, table.table_name) for table in result] + return [(Catalog.identifier_to_tuple(table.table_namespace) + (table.table_name,)) for table in result] def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. @@ -543,15 +574,15 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name) namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name) if namespace: - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) - table_stmt = table_stmt.where(IcebergTables.table_namespace.like(database_name)) - namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(database_name)) + namespace_str = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) + table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_str)) + namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str)) stmt = union( table_stmt, namespace_stmt, ) with Session(self.engine) as session: - return [self.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] + return [Catalog.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: """Get properties for a namespace. @@ -565,12 +596,12 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ - database_name = self.identifier_to_database(namespace) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Database {database_name} does not exists") + namespace_str = Catalog.namespace_to_string(namespace) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace {namespace_str} does not exists") stmt = select(IcebergNamespaceProperties).where( - IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == database_name + IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == namespace_str ) with Session(self.engine) as session: result = session.scalars(stmt) @@ -590,9 +621,9 @@ def update_namespace_properties( NoSuchNamespaceError: If a namespace with the given name does not exist. ValueError: If removals and updates have overlapping keys. """ - database_name = self.identifier_to_database(namespace) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Database {database_name} does not exists") + namespace_str = Catalog.namespace_to_string(namespace) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace {namespace_str} does not exists") current_properties = self.load_namespace_properties(namespace=namespace) properties_update_summary = self._get_updated_props_and_update_summary( @@ -603,7 +634,7 @@ def update_namespace_properties( if removals: delete_stmt = delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, IcebergNamespaceProperties.property_key.in_(removals), ) session.execute(delete_stmt) @@ -614,14 +645,14 @@ def update_namespace_properties( # This is not a problem since it runs in a single transaction delete_stmt = delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, IcebergNamespaceProperties.property_key.in_(set(updates.keys())), ) session.execute(delete_stmt) insert_stmt = insert(IcebergNamespaceProperties) for property_key, property_value in updates.items(): insert_stmt = insert_stmt.values( - catalog_name=self.name, namespace=database_name, property_key=property_key, property_value=property_value + catalog_name=self.name, namespace=namespace_str, property_key=property_key, property_value=property_value ) session.execute(insert_stmt) session.commit() diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 0fbda10960..d1833df081 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -112,9 +112,13 @@ def list(ctx: Context, parent: Optional[str]) -> None: # pylint: disable=redefi """List tables or namespaces.""" catalog, output = _catalog_and_output(ctx) - identifiers = catalog.list_namespaces(parent or ()) - if not identifiers and parent: + identifiers = [] + if parent: + # Do we have tables under parent namespace? identifiers = catalog.list_tables(parent) + if not identifiers: + # List hierarchical namespaces if parent, root namespaces otherwise. + identifiers = catalog.list_namespaces(parent or ()) output.identifiers(identifiers) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index efa7b746a9..285cfd9ab9 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -17,7 +17,7 @@ import os from pathlib import Path -from typing import Generator, List +from typing import Any, Generator, List import pyarrow as pa import pytest @@ -25,6 +25,9 @@ from pytest_lazyfixture import lazy_fixture from sqlalchemy.exc import ArgumentError, IntegrityError +from pyiceberg.catalog import ( + Catalog, +) from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import ( CommitFailedException, @@ -52,51 +55,90 @@ from pyiceberg.types import IntegerType -@pytest.fixture(name="random_identifier") -def fixture_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: +@pytest.fixture(scope="module") +def catalog_name() -> str: + return "test_sql_catalog" + + +@pytest.fixture(name="random_table_identifier") +def fixture_random_table_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) return database_name, table_name -@pytest.fixture(name="another_random_identifier") -def fixture_another_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: +@pytest.fixture(name="random_table_identifier_with_catalog") +def fixture_random_table_identifier_with_catalog( + warehouse: Path, catalog_name: str, database_name: str, table_name: str +) -> Identifier: + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return catalog_name, database_name, table_name + + +@pytest.fixture(name="another_random_table_identifier") +def fixture_another_random_table_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: database_name = database_name + "_new" table_name = table_name + "_new" os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) return database_name, table_name +@pytest.fixture(name="another_random_table_identifier_with_catalog") +def fixture_another_random_table_identifier_with_catalog( + warehouse: Path, catalog_name: str, database_name: str, table_name: str +) -> Identifier: + database_name = database_name + "_new" + table_name = table_name + "_new" + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return catalog_name, database_name, table_name + + +@pytest.fixture(name="random_hierarchical_identifier") +def fixture_random_hierarchical_identifier(warehouse: Path, hierarchical_namespace_name: str, table_name: str) -> Identifier: + os.makedirs(f"{warehouse}/{hierarchical_namespace_name}.db/{table_name}/metadata/", exist_ok=True) + return Catalog.identifier_to_tuple(".".join((hierarchical_namespace_name, table_name))) + + +@pytest.fixture(name="another_random_hierarchical_identifier") +def fixture_another_random_hierarchical_identifier( + warehouse: Path, hierarchical_namespace_name: str, table_name: str +) -> Identifier: + hierarchical_namespace_name = hierarchical_namespace_name + "_new" + table_name = table_name + "_new" + os.makedirs(f"{warehouse}/{hierarchical_namespace_name}.db/{table_name}/metadata/", exist_ok=True) + return Catalog.identifier_to_tuple(".".join((hierarchical_namespace_name, table_name))) + + @pytest.fixture(scope="module") -def catalog_memory(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_memory(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": "sqlite:///:memory:", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() @pytest.fixture(scope="module") -def catalog_sqlite(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() @pytest.fixture(scope="module") -def catalog_sqlite_without_rowcount(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite_without_rowcount(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.engine.dialect.supports_sane_rowcount = False catalog.create_tables() yield catalog @@ -104,26 +146,26 @@ def catalog_sqlite_without_rowcount(warehouse: Path) -> Generator[SqlCatalog, No @pytest.fixture(scope="module") -def catalog_sqlite_fsspec(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite_fsspec(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", PY_IO_IMPL: FSSPEC_FILE_IO, } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() -def test_creation_with_no_uri() -> None: +def test_creation_with_no_uri(catalog_name: str) -> None: with pytest.raises(NoSuchPropertyException): - SqlCatalog("test_ddb_catalog", not_uri="unused") + SqlCatalog(catalog_name, not_uri="unused") -def test_creation_with_unsupported_uri() -> None: +def test_creation_with_unsupported_uri(catalog_name: str) -> None: with pytest.raises(ArgumentError): - SqlCatalog("test_ddb_catalog", uri="unsupported:xxx") + SqlCatalog(catalog_name, uri="unsupported:xxx") @pytest.mark.parametrize( @@ -146,13 +188,22 @@ def test_create_tables_idempotency(catalog: SqlCatalog) -> None: lazy_fixture('catalog_sqlite'), ], ) -def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) assert table.sort_order().order_id == 0, "Order ID must match" assert table.sort_order().is_unsorted is True, "Order must be unsorted" - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -162,15 +213,24 @@ def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_neste lazy_fixture('catalog_sqlite'), ], ) -def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested, properties={"format-version": "1"}) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested, properties={"format-version": "1"}) assert table.sort_order().order_id == 0, "Order ID must match" assert table.sort_order().is_unsorted is True, "Order must be unsorted" assert table.format_version == 1 assert table.spec() == UNPARTITIONED_PARTITION_SPEC - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -180,17 +240,26 @@ def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, rando lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) def test_create_table_with_pyarrow_schema( catalog: SqlCatalog, pyarrow_schema_simple_without_ids: pa.Schema, iceberg_table_schema_simple: Schema, - random_identifier: Identifier, + table_identifier: Identifier, ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, pyarrow_schema_simple_without_ids) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, pyarrow_schema_simple_without_ids) assert table.schema() == iceberg_table_schema_simple - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -200,7 +269,15 @@ def test_create_table_with_pyarrow_schema( lazy_fixture('catalog_sqlite'), ], ) -def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier) -> None: import pyarrow as pa pyarrow_table = pa.Table.from_arrays( @@ -217,9 +294,10 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier pa.field('large', pa.large_string(), nullable=True), ]), ) - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, pyarrow_table.schema) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, pyarrow_table.schema) table.overwrite(pyarrow_table) @@ -230,18 +308,27 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier lazy_fixture('catalog_sqlite'), ], ) -def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) order = SortOrder(SortField(source_id=2, transform=IdentityTransform(), null_order=NullOrder.NULLS_FIRST)) - table = catalog.create_table(random_identifier, table_schema_nested, sort_order=order) + table = catalog.create_table(table_identifier, table_schema_nested, sort_order=order) given_sort_order = table.sort_order() assert given_sort_order.order_id == 1, "Order ID must match" assert len(given_sort_order.fields) == 1, "Order must have 1 field" assert given_sort_order.fields[0].direction == SortDirection.ASC, "Direction must match" assert given_sort_order.fields[0].null_order == NullOrder.NULLS_FIRST, "Null order must match" assert isinstance(given_sort_order.fields[0].transform, IdentityTransform), "Transform must match" - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -251,17 +338,26 @@ def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) def test_create_table_with_default_warehouse_location( - warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_nested) - table = catalog.load_table(random_identifier) - assert table.identifier == (catalog.name,) + random_identifier + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested) + table = catalog.load_table(table_identifier) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog assert table.metadata_location.startswith(f"file://{warehouse}") assert os.path.exists(table.metadata_location[len("file://") :]) - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -271,19 +367,29 @@ def test_create_table_with_default_warehouse_location( lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) def test_create_table_with_given_location_removes_trailing_slash( - warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier ) -> None: - database_name, table_name = random_identifier - location = f"file://{warehouse}/{database_name}.db/{table_name}-given" - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_nested, location=f"{location}/") - table = catalog.load_table(random_identifier) - assert table.identifier == (catalog.name,) + random_identifier + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + table_name = Catalog.table_name_from(table_identifier_nocatalog) + location = f"file://{warehouse}/{catalog.name}.db/{table_name}-given" + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested, location=f"{location}/") + table = catalog.load_table(table_identifier) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog assert table.metadata_location.startswith(f"file://{warehouse}") assert os.path.exists(table.metadata_location[len("file://") :]) assert table.location() == location - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -293,12 +399,21 @@ def test_create_table_with_given_location_removes_trailing_slash( lazy_fixture('catalog_sqlite'), ], ) -def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested) with pytest.raises(TableAlreadyExistsError): - catalog.create_table(random_identifier, table_schema_nested) + catalog.create_table(table_identifier, table_schema_nested) @pytest.mark.parametrize( @@ -308,13 +423,22 @@ def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schem lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) def test_create_table_if_not_exists_duplicated_table( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table1 = catalog.create_table(random_identifier, table_schema_nested) - table2 = catalog.create_table_if_not_exists(random_identifier, table_schema_nested) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table1 = catalog.create_table(table_identifier, table_schema_nested) + table2 = catalog.create_table_if_not_exists(table_identifier, table_schema_nested) assert table1.identifier == table2.identifier @@ -339,7 +463,7 @@ def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_sch ], ) def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: - with pytest.raises(ValueError): + with pytest.raises(NoSuchNamespaceError): catalog.create_table(table_name, table_schema_nested) @@ -350,14 +474,23 @@ def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested lazy_fixture('catalog_sqlite'), ], ) -def test_register_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.register_table(random_identifier, metadata_location) - assert table.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_register_table(catalog: SqlCatalog, table_identifier: Identifier, metadata_location: str) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.register_table(table_identifier, metadata_location) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog assert table.metadata_location == metadata_location assert os.path.exists(metadata_location) - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -367,12 +500,21 @@ def test_register_table(catalog: SqlCatalog, random_identifier: Identifier, meta lazy_fixture('catalog_sqlite'), ], ) -def test_register_existing_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.register_table(random_identifier, metadata_location) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_register_existing_table(catalog: SqlCatalog, table_identifier: Identifier, metadata_location: str) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.register_table(table_identifier, metadata_location) with pytest.raises(TableAlreadyExistsError): - catalog.register_table(random_identifier, metadata_location) + catalog.register_table(table_identifier, metadata_location) @pytest.mark.parametrize( @@ -407,11 +549,20 @@ def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location lazy_fixture('catalog_sqlite'), ], ) -def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - loaded_table = catalog.load_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + loaded_table = catalog.load_table(table_identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location assert table.metadata == loaded_table.metadata @@ -424,12 +575,21 @@ def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, random_ide lazy_fixture('catalog_sqlite'), ], ) -def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - intermediate = catalog.load_table(random_identifier) - assert intermediate.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + intermediate = catalog.load_table(table_identifier) + assert intermediate.identifier == (catalog.name,) + table_identifier_nocatalog loaded_table = catalog.load_table(intermediate.identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location @@ -444,14 +604,23 @@ def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.drop_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog + catalog.drop_table(table_identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(table_identifier) @pytest.mark.parametrize( @@ -462,16 +631,25 @@ def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_ide lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog catalog.drop_table(table.identifier) with pytest.raises(NoSuchTableError): catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(table_identifier) @pytest.mark.parametrize( @@ -482,9 +660,17 @@ def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_table_that_does_not_exist(catalog: SqlCatalog, table_identifier: Identifier) -> None: with pytest.raises(NoSuchTableError): - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( @@ -495,21 +681,39 @@ def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier: lazy_fixture('catalog_sqlite_without_rowcount'), ], ) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) def test_rename_table( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.rename_table(random_identifier, another_random_identifier) - new_table = catalog.load_table(another_random_identifier) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + catalog.rename_table(from_table_identifier, to_table_identifier) + new_table = catalog.load_table(to_table_identifier) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(from_table_identifier) @pytest.mark.parametrize( @@ -520,23 +724,41 @@ def test_rename_table( lazy_fixture('catalog_sqlite_without_rowcount'), ], ) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) def test_rename_table_from_self_identifier( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.rename_table(table.identifier, another_random_identifier) - new_table = catalog.load_table(another_random_identifier) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + catalog.rename_table(table.identifier, to_table_identifier) + new_table = catalog.load_table(to_table_identifier) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(from_table_identifier) @pytest.mark.parametrize( @@ -547,19 +769,37 @@ def test_rename_table_from_self_identifier( lazy_fixture('catalog_sqlite_without_rowcount'), ], ) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) def test_rename_table_to_existing_one( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - new_table = catalog.create_table(another_random_identifier, table_schema_nested) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + new_table = catalog.create_table(to_table_identifier, table_schema_nested) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog with pytest.raises(TableAlreadyExistsError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( @@ -570,11 +810,28 @@ def test_rename_table_to_existing_one( lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier) -> None: - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(to_database_name) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) +def test_rename_missing_table(catalog: SqlCatalog, from_table_identifier: Identifier, to_table_identifier: Identifier) -> None: + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(to_namespace) with pytest.raises(NoSuchTableError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( @@ -585,15 +842,32 @@ def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier lazy_fixture('catalog_sqlite_without_rowcount'), ], ) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) def test_rename_table_to_missing_namespace( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - catalog.create_namespace(from_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog with pytest.raises(NoSuchNamespaceError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( @@ -603,22 +877,40 @@ def test_rename_table_to_missing_namespace( lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize( + "table_identifier_1", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "table_identifier_2", + [ + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), + ], +) def test_list_tables( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, table_identifier_1: Identifier, table_identifier_2: Identifier ) -> None: - database_name_1, _table_name_1 = random_identifier - database_name_2, _table_name_2 = another_random_identifier - catalog.create_namespace(database_name_1) - catalog.create_namespace(database_name_2) - catalog.create_table(random_identifier, table_schema_nested) - catalog.create_table(another_random_identifier, table_schema_nested) - identifier_list = catalog.list_tables(database_name_1) + table_identifier_1_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier_1) + table_identifier_2_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier_2) + namespace_1 = Catalog.namespace_from(table_identifier_1_nocatalog) + namespace_2 = Catalog.namespace_from(table_identifier_2_nocatalog) + catalog.create_namespace(namespace_1) + catalog.create_namespace(namespace_2) + catalog.create_table(table_identifier_1, table_schema_nested) + catalog.create_table(table_identifier_2, table_schema_nested) + identifier_list = catalog.list_tables(namespace_1) assert len(identifier_list) == 1 - assert random_identifier in identifier_list + assert table_identifier_1_nocatalog in identifier_list - identifier_list = catalog.list_tables(database_name_2) + identifier_list = catalog.list_tables(namespace_2) assert len(identifier_list) == 1 - assert another_random_identifier in identifier_list + assert table_identifier_2_nocatalog in identifier_list @pytest.mark.parametrize( @@ -628,9 +920,10 @@ def test_list_tables( lazy_fixture('catalog_sqlite'), ], ) -def test_create_namespace(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name) - assert (database_name,) in catalog.list_namespaces() +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_list_tables_when_missing_namespace(catalog: SqlCatalog, namespace: str) -> None: + with pytest.raises(NoSuchNamespaceError): + catalog.list_tables(namespace) @pytest.mark.parametrize( @@ -654,10 +947,24 @@ def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str) lazy_fixture('catalog_sqlite'), ], ) -def test_create_duplicate_namespace(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) + assert (Catalog.identifier_to_tuple(namespace)) in catalog.list_namespaces() + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_duplicate_namespace(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) with pytest.raises(NamespaceAlreadyExistsError): - catalog.create_namespace(database_name) + catalog.create_namespace(namespace) @pytest.mark.parametrize( @@ -667,10 +974,11 @@ def test_create_duplicate_namespace(catalog: SqlCatalog, database_name: str) -> lazy_fixture('catalog_sqlite'), ], ) -def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name + "_1") +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace + "_1") # Second namespace is a prefix of the first one, make sure it can be added. - catalog.create_namespace(database_name) + catalog.create_namespace(namespace) @pytest.mark.parametrize( @@ -680,16 +988,17 @@ def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, database_nam lazy_fixture('catalog_sqlite'), ], ) -def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespace: str) -> None: test_location = "/test/location" test_properties = { "comment": "this is a test description", "location": test_location, } - catalog.create_namespace(namespace=database_name, properties=test_properties) + catalog.create_namespace(namespace=namespace, properties=test_properties) loaded_database_list = catalog.list_namespaces() - assert (database_name,) in loaded_database_list - properties = catalog.load_namespace_properties(database_name) + assert Catalog.identifier_to_tuple(namespace) in loaded_database_list + properties = catalog.load_namespace_properties(namespace) assert properties["comment"] == "this is a test description" assert properties["location"] == test_location @@ -701,13 +1010,27 @@ def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, databas lazy_fixture('catalog_sqlite'), ], ) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @pytest.mark.filterwarnings("ignore") -def test_create_namespace_with_null_properties(catalog: SqlCatalog, database_name: str) -> None: +def test_create_namespace_with_null_properties(catalog: SqlCatalog, namespace: str) -> None: with pytest.raises(IntegrityError): - catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore + catalog.create_namespace(namespace=namespace, properties={None: "value"}) # type: ignore with pytest.raises(IntegrityError): - catalog.create_namespace(namespace=database_name, properties={"key": None}) + catalog.create_namespace(namespace=namespace, properties={"key": None}) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + ], +) +@pytest.mark.parametrize("empty_namespace", ["", (), (""), ("", ""), " ", (" ")]) +def test_create_namespace_with_empty_identifier(catalog: SqlCatalog, empty_namespace: Any) -> None: + with pytest.raises(NoSuchNamespaceError): + catalog.create_namespace(empty_namespace) @pytest.mark.parametrize( @@ -717,13 +1040,17 @@ def test_create_namespace_with_null_properties(catalog: SqlCatalog, database_nam lazy_fixture('catalog_sqlite'), ], ) -def test_list_namespaces(catalog: SqlCatalog, database_list: List[str]) -> None: - for database_name in database_list: - catalog.create_namespace(database_name) - db_list = catalog.list_namespaces() - for database_name in database_list: - assert (database_name,) in db_list - assert len(catalog.list_namespaces(database_name)) == 1 +@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")]) +def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None: + for namespace in namespace_list: + catalog.create_namespace(namespace) + # Test global list + ns_list = catalog.list_namespaces() + for namespace in namespace_list: + assert Catalog.identifier_to_tuple(namespace) in ns_list + # Test individual namespace list + assert len(one_namespace := catalog.list_namespaces(namespace)) == 1 + assert Catalog.identifier_to_tuple(namespace) == one_namespace[0] @pytest.mark.parametrize( @@ -745,16 +1072,25 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: lazy_fixture('catalog_sqlite'), ], ) -def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, table_name = random_identifier - catalog.create_namespace(database_name) - assert (database_name,) in catalog.list_namespaces() - catalog.create_table((database_name, table_name), table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + assert namespace in catalog.list_namespaces() + catalog.create_table(table_identifier, table_schema_nested) with pytest.raises(NamespaceNotEmptyError): - catalog.drop_namespace(database_name) - catalog.drop_table((database_name, table_name)) - catalog.drop_namespace(database_name) - assert (database_name,) not in catalog.list_namespaces() + catalog.drop_namespace(namespace) + catalog.drop_table(table_identifier) + catalog.drop_namespace(namespace) + assert namespace not in catalog.list_namespaces() @pytest.mark.parametrize( @@ -764,18 +1100,19 @@ def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, random lazy_fixture('catalog_sqlite'), ], ) -def test_load_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_load_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", - "location": f"{warehouse_location}/{database_name}.db", + "location": f"{warehouse_location}/{namespace}.db", "test_property1": "1", "test_property2": "2", "test_property3": "3", } - catalog.create_namespace(database_name, test_properties) - listed_properties = catalog.load_namespace_properties(database_name) + catalog.create_namespace(namespace, test_properties) + listed_properties = catalog.load_namespace_properties(namespace) for k, v in listed_properties.items(): assert k in test_properties assert v == test_properties[k] @@ -788,9 +1125,10 @@ def test_load_namespace_properties(catalog: SqlCatalog, database_name: str) -> N lazy_fixture('catalog_sqlite'), ], ) -def test_load_empty_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name) - listed_properties = catalog.load_namespace_properties(database_name) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_load_empty_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) + listed_properties = catalog.load_namespace_properties(namespace) assert listed_properties == {"exists": "true"} @@ -813,19 +1151,20 @@ def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) - lazy_fixture('catalog_sqlite'), ], ) -def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_update_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", - "location": f"{warehouse_location}/{database_name}.db", + "location": f"{warehouse_location}/{namespace}.db", "test_property1": "1", "test_property2": "2", "test_property3": "3", } removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"} updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"} - catalog.create_namespace(database_name, test_properties) - update_report = catalog.update_namespace_properties(database_name, removals, updates) + catalog.create_namespace(namespace, test_properties) + update_report = catalog.update_namespace_properties(namespace, removals, updates) for k in updates.keys(): assert k in update_report.updated for k in removals: @@ -833,7 +1172,7 @@ def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> assert k in update_report.missing else: assert k in update_report.removed - assert "updated test description" == catalog.load_namespace_properties(database_name)["comment"] + assert "updated test description" == catalog.load_namespace_properties(namespace)["comment"] @pytest.mark.parametrize( @@ -844,10 +1183,19 @@ def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) assert catalog._parse_metadata_version(table.metadata_location) == 0 assert table.metadata.current_schema_id == 0 @@ -878,10 +1226,19 @@ def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, random_i lazy_fixture('catalog_sqlite_fsspec'), ], ) -def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_simple) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_simple) df = pa.Table.from_pydict( { @@ -918,11 +1275,20 @@ def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, random_i lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table_a = catalog.create_table(random_identifier, table_schema_simple) - table_b = catalog.load_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table_a = catalog.create_table(table_identifier, table_schema_simple) + table_b = catalog.load_table(table_identifier) with table_a.update_schema() as update: update.add_column(path="b", field_type=IntegerType()) @@ -992,12 +1358,21 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: lazy_fixture('catalog_sqlite_without_rowcount'), ], ) -def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: # table properties can be set to int, but still serialized to string - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) property_with_int = {"property_name": 42} - table = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_int) + table = catalog.create_table(table_identifier, table_schema_simple, properties=property_with_int) assert isinstance(table.properties["property_name"], str) @@ -1009,14 +1384,23 @@ def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Sc lazy_fixture('catalog_sqlite_without_rowcount'), ], ) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) def test_table_properties_raise_for_none_value( - catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier + catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) property_with_none = {"property_name": None} with pytest.raises(ValidationError) as exc_info: - _ = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_none) + _ = catalog.create_table(table_identifier, table_schema_simple, properties=property_with_none) assert "None type is not a supported value in properties: property_name" in str(exc_info.value) @@ -1027,11 +1411,20 @@ def test_table_properties_raise_for_none_value( lazy_fixture('catalog_sqlite'), ], ) -def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_simple, properties={"format-version": "2"}) - existing_table = random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_simple, properties={"format-version": "2"}) + existing_table = table_identifier # Act and Assert for an existing table assert catalog.table_exists(existing_table) is True diff --git a/tests/conftest.py b/tests/conftest.py index 6679543694..4baefafef4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1878,6 +1878,19 @@ def database_list(database_name: str) -> List[str]: return [f"{database_name}_{idx}" for idx in range(NUM_TABLES)] +@pytest.fixture() +def hierarchical_namespace_name() -> str: + prefix = "my_iceberg_ns-" + random_tag1 = "".join(choice(string.ascii_letters) for _ in range(RANDOM_LENGTH)) + random_tag2 = "".join(choice(string.ascii_letters) for _ in range(RANDOM_LENGTH)) + return ".".join([prefix + random_tag1, prefix + random_tag2]).lower() + + +@pytest.fixture() +def hierarchical_namespace_list(hierarchical_namespace_name: str) -> List[str]: + return [f"{hierarchical_namespace_name}_{idx}" for idx in range(NUM_TABLES)] + + BUCKET_NAME = "test_bucket" TABLE_METADATA_LOCATION_REGEX = re.compile( r"""s3://test_bucket/my_iceberg_database-[a-z]{20}.db/ From 4fb8ba24290692dee02ec39e4b7480d75105e220 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 May 2024 06:54:45 +0200 Subject: [PATCH 17/68] Bump coverage from 7.5.2 to 7.5.3 (#776) Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.5.2 to 7.5.3. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/7.5.2...7.5.3) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 106 ++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4ef706c873..7e413b58df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -652,63 +652,63 @@ files = [ [[package]] name = "coverage" -version = "7.5.2" +version = "7.5.3" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:554c7327bf0fd688050348e22db7c8e163fb7219f3ecdd4732d7ed606b417263"}, - {file = "coverage-7.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d0305e02e40c7cfea5d08d6368576537a74c0eea62b77633179748d3519d6705"}, - {file = "coverage-7.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:829fb55ad437d757c70d5b1c51cfda9377f31506a0a3f3ac282bc6a387d6a5f1"}, - {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:894b1acded706f1407a662d08e026bfd0ff1e59e9bd32062fea9d862564cfb65"}, - {file = "coverage-7.5.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe76d6dee5e4febefa83998b17926df3a04e5089e3d2b1688c74a9157798d7a2"}, - {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c7ebf2a37e4f5fea3c1a11e1f47cea7d75d0f2d8ef69635ddbd5c927083211fc"}, - {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20e611fc36e1a0fc7bbf957ef9c635c8807d71fbe5643e51b2769b3cc0fb0b51"}, - {file = "coverage-7.5.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c5c5b7ae2763533152880d5b5b451acbc1089ade2336b710a24b2b0f5239d20"}, - {file = "coverage-7.5.2-cp310-cp310-win32.whl", hash = "sha256:1e4225990a87df898e40ca31c9e830c15c2c53b1d33df592bc8ef314d71f0281"}, - {file = "coverage-7.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:976cd92d9420e6e2aa6ce6a9d61f2b490e07cb468968adf371546b33b829284b"}, - {file = "coverage-7.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5997d418c219dcd4dcba64e50671cca849aaf0dac3d7a2eeeb7d651a5bd735b8"}, - {file = "coverage-7.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec27e93bbf5976f0465e8936f02eb5add99bbe4e4e7b233607e4d7622912d68d"}, - {file = "coverage-7.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f11f98753800eb1ec872562a398081f6695f91cd01ce39819e36621003ec52a"}, - {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e34680049eecb30b6498784c9637c1c74277dcb1db75649a152f8004fbd6646"}, - {file = "coverage-7.5.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e12536446ad4527ac8ed91d8a607813085683bcce27af69e3b31cd72b3c5960"}, - {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d3f7744b8a8079d69af69d512e5abed4fb473057625588ce126088e50d05493"}, - {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:431a3917e32223fcdb90b79fe60185864a9109631ebc05f6c5aa03781a00b513"}, - {file = "coverage-7.5.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a7c6574225f34ce45466f04751d957b5c5e6b69fca9351db017c9249786172ce"}, - {file = "coverage-7.5.2-cp311-cp311-win32.whl", hash = "sha256:2b144d142ec9987276aeff1326edbc0df8ba4afbd7232f0ca10ad57a115e95b6"}, - {file = "coverage-7.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:900532713115ac58bc3491b9d2b52704a05ed408ba0918d57fd72c94bc47fba1"}, - {file = "coverage-7.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a42970ce74c88bdf144df11c52c5cf4ad610d860de87c0883385a1c9d9fa4ab"}, - {file = "coverage-7.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26716a1118c6ce2188283b4b60a898c3be29b480acbd0a91446ced4fe4e780d8"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60b66b0363c5a2a79fba3d1cd7430c25bbd92c923d031cae906bdcb6e054d9a2"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d22eba19273b2069e4efeff88c897a26bdc64633cbe0357a198f92dca94268"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb5b92a0ab3d22dfdbfe845e2fef92717b067bdf41a5b68c7e3e857c0cff1a4"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1aef719b6559b521ae913ddeb38f5048c6d1a3d366865e8b320270b7bc4693c2"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8809c0ea0e8454f756e3bd5c36d04dddf222989216788a25bfd6724bfcee342c"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1acc2e2ef098a1d4bf535758085f508097316d738101a97c3f996bccba963ea5"}, - {file = "coverage-7.5.2-cp312-cp312-win32.whl", hash = "sha256:97de509043d3f0f2b2cd171bdccf408f175c7f7a99d36d566b1ae4dd84107985"}, - {file = "coverage-7.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:8941e35a0e991a7a20a1fa3e3182f82abe357211f2c335a9e6007067c3392fcf"}, - {file = "coverage-7.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5662bf0f6fb6757f5c2d6279c541a5af55a39772c2362ed0920b27e3ce0e21f7"}, - {file = "coverage-7.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3d9c62cff2ffb4c2a95328488fd7aa96a7a4b34873150650fe76b19c08c9c792"}, - {file = "coverage-7.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74eeaa13e8200ad72fca9c5f37395fb310915cec6f1682b21375e84fd9770e84"}, - {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f29bf497d51a5077994b265e976d78b09d9d0dff6ca5763dbb4804534a5d380"}, - {file = "coverage-7.5.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f96aa94739593ae0707eda9813ce363a0a0374a810ae0eced383340fc4a1f73"}, - {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:51b6cee539168a912b4b3b040e4042b9e2c9a7ad9c8546c09e4eaeff3eacba6b"}, - {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:59a75e6aa5c25b50b5a1499f9718f2edff54257f545718c4fb100f48d570ead4"}, - {file = "coverage-7.5.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29da75ce20cb0a26d60e22658dd3230713c6c05a3465dd8ad040ffc991aea318"}, - {file = "coverage-7.5.2-cp38-cp38-win32.whl", hash = "sha256:23f2f16958b16152b43a39a5ecf4705757ddd284b3b17a77da3a62aef9c057ef"}, - {file = "coverage-7.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:9e41c94035e5cdb362beed681b58a707e8dc29ea446ea1713d92afeded9d1ddd"}, - {file = "coverage-7.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:06d96b9b19bbe7f049c2be3c4f9e06737ec6d8ef8933c7c3a4c557ef07936e46"}, - {file = "coverage-7.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:878243e1206828908a6b4a9ca7b1aa8bee9eb129bf7186fc381d2646f4524ce9"}, - {file = "coverage-7.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:482df956b055d3009d10fce81af6ffab28215d7ed6ad4a15e5c8e67cb7c5251c"}, - {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a35c97af60a5492e9e89f8b7153fe24eadfd61cb3a2fb600df1a25b5dab34b7e"}, - {file = "coverage-7.5.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24bb4c7859a3f757a116521d4d3a8a82befad56ea1bdacd17d6aafd113b0071e"}, - {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e1046aab24c48c694f0793f669ac49ea68acde6a0798ac5388abe0a5615b5ec8"}, - {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:448ec61ea9ea7916d5579939362509145caaecf03161f6f13e366aebb692a631"}, - {file = "coverage-7.5.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4a00bd5ba8f1a4114720bef283cf31583d6cb1c510ce890a6da6c4268f0070b7"}, - {file = "coverage-7.5.2-cp39-cp39-win32.whl", hash = "sha256:9f805481d5eff2a96bac4da1570ef662bf970f9a16580dc2c169c8c3183fa02b"}, - {file = "coverage-7.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:2c79f058e7bec26b5295d53b8c39ecb623448c74ccc8378631f5cb5c16a7e02c"}, - {file = "coverage-7.5.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:40dbb8e7727560fe8ab65efcddfec1ae25f30ef02e2f2e5d78cfb52a66781ec5"}, - {file = "coverage-7.5.2.tar.gz", hash = "sha256:13017a63b0e499c59b5ba94a8542fb62864ba3016127d1e4ef30d354fc2b00e9"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, + {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, + {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, + {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, + {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, + {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, + {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, + {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, + {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, + {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, + {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, + {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, + {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, ] [package.dependencies] From ec8d7dc2bc8fff87e8192c4eef377945bd0e4015 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 May 2024 08:33:25 +0200 Subject: [PATCH 18/68] Bump pydantic from 2.7.1 to 2.7.2 (#775) Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.7.1 to 2.7.2. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.7.1...v2.7.2) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 168 ++++++++++++++++++++++++++-------------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7e413b58df..35c0f9ee0d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3024,18 +3024,18 @@ files = [ [[package]] name = "pydantic" -version = "2.7.1" +version = "2.7.2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, - {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, + {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"}, + {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.18.2" +pydantic-core = "2.18.3" typing-extensions = ">=4.6.1" [package.extras] @@ -3043,90 +3043,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.18.2" +version = "2.18.3" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, - {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, - {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, - {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, - {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, - {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, - {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, - {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, - {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, - {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, - {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, - {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, - {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, - {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, + {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"}, + {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"}, + {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"}, + {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"}, + {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"}, + {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"}, + {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"}, + {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"}, + {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"}, + {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"}, + {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"}, + {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"}, + {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"}, + {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"}, + {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"}, + {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"}, + {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"}, + {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"}, + {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"}, + {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"}, + {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"}, + {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"}, + {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"}, + {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"}, + {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"}, + {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"}, + {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"}, + {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"}, + {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"}, + {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"}, + {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"}, + {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"}, + {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"}, ] [package.dependencies] From 7552e03d77f057fc6e1b07104d7b8a06a0a21cd1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 06:14:22 +0200 Subject: [PATCH 19/68] Bump requests from 2.32.2 to 2.32.3 (#778) Bumps [requests](https://github.com/psf/requests) from 2.32.2 to 2.32.3. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.2...v2.32.3) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 35c0f9ee0d..95118015c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3584,13 +3584,13 @@ files = [ [[package]] name = "requests" -version = "2.32.2" +version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" files = [ - {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, - {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] From e08cc9dd704ae46149e0644f1c9cbf1509360613 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 06:31:05 +0200 Subject: [PATCH 20/68] Bump getdaft from 0.2.24 to 0.2.25 (#779) Bumps [getdaft](https://github.com/Eventual-Inc/Daft) from 0.2.24 to 0.2.25. - [Release notes](https://github.com/Eventual-Inc/Daft/releases) - [Commits](https://github.com/Eventual-Inc/Daft/compare/v0.2.24...v0.2.25) --- updated-dependencies: - dependency-name: getdaft dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 95118015c6..7931ee0e38 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1344,17 +1344,17 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.2.24" +version = "0.2.25" description = "Distributed Dataframes for Multimodal Data" optional = true python-versions = ">=3.8" files = [ - {file = "getdaft-0.2.24-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6dbb2c25f14c008fe1323590dc86bbed9d0de8b470aa62c0844bb218864b42da"}, - {file = "getdaft-0.2.24-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:1c27ff4e3e00275db611c8fad5edefc1a24f8494093ce18f0b846b147b4d6cd6"}, - {file = "getdaft-0.2.24-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae0d0ae1238fa5eb2ddfbefbc52e47aa6f9d00e9621dde0ecbee70be43cee8e8"}, - {file = "getdaft-0.2.24-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473881f9406d166dace7f12a3cb74915f8901b628f6d9f0900fdf69cf05b0031"}, - {file = "getdaft-0.2.24-cp38-abi3-win_amd64.whl", hash = "sha256:c77266e55245c95a5c972dd49a47a764cde1b2007cc30ab08c2f25f7a36d6697"}, - {file = "getdaft-0.2.24.tar.gz", hash = "sha256:1fa4eae81ab101bed544ee64e3128e2df4f267a87640cd1473e00f944c32a216"}, + {file = "getdaft-0.2.25-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7aab5bdf4af6b9bb0f7e0555cd36762d57da97ed026017f3a4b00f97bf5bf7f1"}, + {file = "getdaft-0.2.25-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:12a95f0ce9206c77a439ace0dc705d13acbe0e8278907ad2e57f62e0c01330ad"}, + {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfeef90e2f446f65e0e7292431e5354995fe693cf9bbbd434dafd4b8971ea83"}, + {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b86a42e7310de613a0fb30d68a70ee0678e6605023e48a3c1dd28f8752d380e"}, + {file = "getdaft-0.2.25-cp38-abi3-win_amd64.whl", hash = "sha256:fbb3437e666478d06e661d961e5fd10b8cc33385bd2bafafcd22daf403fe6df1"}, + {file = "getdaft-0.2.25.tar.gz", hash = "sha256:60b2ca7d39447ba4b19eab6ccfd6fc706914ecf43d0080a13c832b013dda589b"}, ] [package.dependencies] From d3ad61c5d4cdbf908d667e2ed3ef5ad2d9f15fbe Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 30 May 2024 09:04:42 +0200 Subject: [PATCH 21/68] Remove `record_fields` from the `Record` class (#580) First step towards https://github.com/apache/iceberg-python/issues/579 --- pyiceberg/manifest.py | 3 ++- pyiceberg/partitioning.py | 4 ++-- pyiceberg/table/snapshots.py | 4 ++-- pyiceberg/typedef.py | 7 +++---- tests/integration/test_rest_manifest.py | 5 +++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 3b8138b61a..defe5958c5 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -18,6 +18,7 @@ import math from abc import ABC, abstractmethod +from copy import copy from enum import Enum from types import TracebackType from typing import ( @@ -909,7 +910,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id self._sequence_number = sequence_number def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile: - wrapped_manifest_file = ManifestFile(*manifest_file.record_fields()) + wrapped_manifest_file = copy(manifest_file) if wrapped_manifest_file.sequence_number == UNASSIGNED_SEQ: # if the sequence number is being assigned here, then the manifest must be created by the current operation. diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index a3cf255341..f4e53a59a5 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -229,9 +229,9 @@ def partition_to_path(self, data: Record, schema: Schema) -> str: field_strs = [] value_strs = [] - for pos, value in enumerate(data.record_fields()): + for pos in range(len(self.fields)): partition_field = self.fields[pos] - value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=value) + value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos]) value_str = quote(value_str, safe='') value_strs.append(value_str) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index f74ac4b7d4..79eb8b0b8a 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -274,14 +274,14 @@ def set_partition_summary_limit(self, limit: int) -> None: def add_file(self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC) -> None: self.metrics.add_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=True, schema=schema) def remove_file( self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC ) -> None: self.metrics.remove_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema) def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None: diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 6ccf9526ba..26f4d4d5ac 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -25,7 +25,6 @@ Callable, Dict, Generic, - List, Literal, Optional, Protocol, @@ -198,9 +197,9 @@ def __repr__(self) -> str: """Return the string representation of the Record class.""" return f"{self.__class__.__name__}[{', '.join(f'{key}={repr(value)}' for key, value in self.__dict__.items() if not key.startswith('_'))}]" - def record_fields(self) -> List[str]: - """Return values of all the fields of the Record class except those specified in skip_fields.""" - return [self.__getattribute__(v) if hasattr(self, v) else None for v in self._position_to_field_name] + def __len__(self) -> int: + """Return the number of fields in the Record class.""" + return len(self._position_to_field_name) def __hash__(self) -> int: """Return hash value of the Record class.""" diff --git a/tests/integration/test_rest_manifest.py b/tests/integration/test_rest_manifest.py index 8191209ae6..0e768c6e68 100644 --- a/tests/integration/test_rest_manifest.py +++ b/tests/integration/test_rest_manifest.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import inspect +from copy import copy from enum import Enum from tempfile import TemporaryDirectory from typing import Any @@ -26,7 +27,7 @@ from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.io.pyarrow import PyArrowFileIO -from pyiceberg.manifest import DataFile, ManifestEntry, write_manifest +from pyiceberg.manifest import DataFile, write_manifest from pyiceberg.table import Table from pyiceberg.utils.lazydict import LazyDict @@ -99,7 +100,7 @@ def test_write_sample_manifest(table_test_all_types: Table) -> None: sort_order_id=entry.data_file.sort_order_id, spec_id=entry.data_file.spec_id, ) - wrapped_entry_v2 = ManifestEntry(*entry.record_fields()) + wrapped_entry_v2 = copy(entry) wrapped_entry_v2.data_file = wrapped_data_file_v2_debug wrapped_entry_v2_dict = todict(wrapped_entry_v2) # This one should not be written From cf3bf8a977f80f986237bc62293666de327871b3 Mon Sep 17 00:00:00 2001 From: Honah J Date: Thu, 30 May 2024 06:44:59 -0700 Subject: [PATCH 22/68] Unify to double quotes using Ruff (#781) --- pyiceberg/catalog/hive.py | 2 +- pyiceberg/catalog/rest.py | 2 +- pyiceberg/expressions/parser.py | 10 +- pyiceberg/partitioning.py | 2 +- pyiceberg/schema.py | 10 +- pyiceberg/table/__init__.py | 238 ++++++------ pyiceberg/table/metadata.py | 6 +- pyiceberg/table/name_mapping.py | 14 +- pyiceberg/table/refs.py | 8 +- pyiceberg/table/snapshots.py | 66 ++-- pyiceberg/typedef.py | 2 +- pyiceberg/utils/config.py | 2 +- ruff.toml | 2 +- tests/avro/test_file.py | 10 +- tests/catalog/integration_test_glue.py | 2 +- tests/catalog/test_dynamodb.py | 6 +- tests/catalog/test_glue.py | 4 +- tests/catalog/test_hive.py | 160 ++++---- tests/catalog/test_sql.py | 348 +++++++++--------- tests/conftest.py | 56 +-- tests/expressions/test_expressions.py | 8 +- tests/integration/test_add_files.py | 8 +- tests/integration/test_inspect_table.py | 218 +++++------ tests/integration/test_partition_evolution.py | 78 ++-- tests/integration/test_partitioning_key.py | 16 +- tests/integration/test_reads.py | 60 +-- tests/integration/test_rest_manifest.py | 2 +- tests/integration/test_rest_schema.py | 22 +- .../test_writes/test_partitioned_writes.py | 60 +-- tests/integration/test_writes/test_writes.py | 232 ++++++------ tests/io/test_pyarrow.py | 8 +- tests/io/test_pyarrow_visitor.py | 78 ++-- tests/table/test_init.py | 48 +-- tests/table/test_metadata.py | 2 +- tests/table/test_name_mapping.py | 160 ++++---- tests/table/test_snapshots.py | 162 ++++---- tests/test_serializers.py | 2 +- tests/test_transforms.py | 6 +- tests/utils/test_config.py | 4 +- tests/utils/test_decimal.py | 4 +- 40 files changed, 1064 insertions(+), 1064 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 708ae8c9d4..13b57b6ea9 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -146,7 +146,7 @@ def __init__(self, uri: str, ugi: Optional[str] = None): protocol = TBinaryProtocol.TBinaryProtocol(transport) self._client = Client(protocol) - self._ugi = ugi.split(':') if ugi else None + self._ugi = ugi.split(":") if ugi else None def __enter__(self) -> Client: self._transport.open() diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index afd5818662..2474b89853 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -152,7 +152,7 @@ class CreateTableRequest(IcebergBaseModel): properties: Dict[str, str] = Field(default_factory=dict) # validators - @field_validator('properties', mode='before') + @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 8873907813..107d2349db 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -78,7 +78,7 @@ identifier = Word(alphas, alphanums + "_$").set_results_name("identifier") column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column") -like_regex = r'(?P(?(?(?(? BooleanExpression: match = re.search(like_regex, literal_like.value) - if match and match.groupdict()['invalid_wildcard']: + if match and match.groupdict()["invalid_wildcard"]: raise ValueError("LIKE expressions only supports wildcard, '%', at the end of a string") - elif match and match.groupdict()['valid_wildcard']: - return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%'))) + elif match and match.groupdict()["valid_wildcard"]: + return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace("\\%", "%"))) else: - return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%'))) + return EqualTo(result.column, StringLiteral(literal_like.value.replace("\\%", "%"))) predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate") diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index f4e53a59a5..481207db7a 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -233,7 +233,7 @@ def partition_to_path(self, data: Record, schema: Schema) -> str: partition_field = self.fields[pos] value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos]) - value_str = quote(value_str, safe='') + value_str = quote(value_str, safe="") value_strs.append(value_str) field_strs.append(partition_field.name) diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index b2739d8618..77f1addbf5 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -1311,11 +1311,11 @@ def _valid_avro_name(name: str) -> bool: length = len(name) assert length > 0, ValueError("Can not validate empty avro name") first = name[0] - if not (first.isalpha() or first == '_'): + if not (first.isalpha() or first == "_"): return False for character in name[1:]: - if not (character.isalnum() or character == '_'): + if not (character.isalnum() or character == "_"): return False return True @@ -1323,17 +1323,17 @@ def _valid_avro_name(name: str) -> bool: def _sanitize_name(name: str) -> str: sb = [] first = name[0] - if not (first.isalpha() or first == '_'): + if not (first.isalpha() or first == "_"): sb.append(_sanitize_char(first)) else: sb.append(first) for character in name[1:]: - if not (character.isalnum() or character == '_'): + if not (character.isalnum() or character == "_"): sb.append(_sanitize_char(character)) else: sb.append(character) - return ''.join(sb) + return "".join(sb) def _sanitize_char(character: str) -> str: diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 74b0225dbe..aa108de08b 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -568,17 +568,17 @@ def commit_transaction(self) -> Table: class AssignUUIDUpdate(IcebergBaseModel): - action: Literal['assign-uuid'] = Field(default="assign-uuid") + action: Literal["assign-uuid"] = Field(default="assign-uuid") uuid: uuid.UUID class UpgradeFormatVersionUpdate(IcebergBaseModel): - action: Literal['upgrade-format-version'] = Field(default="upgrade-format-version") + action: Literal["upgrade-format-version"] = Field(default="upgrade-format-version") format_version: int = Field(alias="format-version") class AddSchemaUpdate(IcebergBaseModel): - action: Literal['add-schema'] = Field(default="add-schema") + action: Literal["add-schema"] = Field(default="add-schema") schema_: Schema = Field(alias="schema") # This field is required: https://github.com/apache/iceberg/pull/7445 last_column_id: int = Field(alias="last-column-id") @@ -587,47 +587,47 @@ class AddSchemaUpdate(IcebergBaseModel): class SetCurrentSchemaUpdate(IcebergBaseModel): - action: Literal['set-current-schema'] = Field(default="set-current-schema") + action: Literal["set-current-schema"] = Field(default="set-current-schema") schema_id: int = Field( alias="schema-id", description="Schema ID to set as current, or -1 to set last added schema", default=-1 ) class AddPartitionSpecUpdate(IcebergBaseModel): - action: Literal['add-spec'] = Field(default="add-spec") + action: Literal["add-spec"] = Field(default="add-spec") spec: PartitionSpec initial_change: bool = Field(default=False, exclude=True) class SetDefaultSpecUpdate(IcebergBaseModel): - action: Literal['set-default-spec'] = Field(default="set-default-spec") + action: Literal["set-default-spec"] = Field(default="set-default-spec") spec_id: int = Field( alias="spec-id", description="Partition spec ID to set as the default, or -1 to set last added spec", default=-1 ) class AddSortOrderUpdate(IcebergBaseModel): - action: Literal['add-sort-order'] = Field(default="add-sort-order") + action: Literal["add-sort-order"] = Field(default="add-sort-order") sort_order: SortOrder = Field(alias="sort-order") initial_change: bool = Field(default=False, exclude=True) class SetDefaultSortOrderUpdate(IcebergBaseModel): - action: Literal['set-default-sort-order'] = Field(default="set-default-sort-order") + action: Literal["set-default-sort-order"] = Field(default="set-default-sort-order") sort_order_id: int = Field( alias="sort-order-id", description="Sort order ID to set as the default, or -1 to set last added sort order", default=-1 ) class AddSnapshotUpdate(IcebergBaseModel): - action: Literal['add-snapshot'] = Field(default="add-snapshot") + action: Literal["add-snapshot"] = Field(default="add-snapshot") snapshot: Snapshot class SetSnapshotRefUpdate(IcebergBaseModel): - action: Literal['set-snapshot-ref'] = Field(default="set-snapshot-ref") + action: Literal["set-snapshot-ref"] = Field(default="set-snapshot-ref") ref_name: str = Field(alias="ref-name") type: Literal["tag", "branch"] snapshot_id: int = Field(alias="snapshot-id") @@ -637,31 +637,31 @@ class SetSnapshotRefUpdate(IcebergBaseModel): class RemoveSnapshotsUpdate(IcebergBaseModel): - action: Literal['remove-snapshots'] = Field(default="remove-snapshots") + action: Literal["remove-snapshots"] = Field(default="remove-snapshots") snapshot_ids: List[int] = Field(alias="snapshot-ids") class RemoveSnapshotRefUpdate(IcebergBaseModel): - action: Literal['remove-snapshot-ref'] = Field(default="remove-snapshot-ref") + action: Literal["remove-snapshot-ref"] = Field(default="remove-snapshot-ref") ref_name: str = Field(alias="ref-name") class SetLocationUpdate(IcebergBaseModel): - action: Literal['set-location'] = Field(default="set-location") + action: Literal["set-location"] = Field(default="set-location") location: str class SetPropertiesUpdate(IcebergBaseModel): - action: Literal['set-properties'] = Field(default="set-properties") + action: Literal["set-properties"] = Field(default="set-properties") updates: Dict[str, str] - @field_validator('updates', mode='before') + @field_validator("updates", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) class RemovePropertiesUpdate(IcebergBaseModel): - action: Literal['remove-properties'] = Field(default="remove-properties") + action: Literal["remove-properties"] = Field(default="remove-properties") removals: List[str] @@ -683,7 +683,7 @@ class RemovePropertiesUpdate(IcebergBaseModel): SetPropertiesUpdate, RemovePropertiesUpdate, ], - Field(discriminator='action'), + Field(discriminator="action"), ] @@ -1142,7 +1142,7 @@ def validate(self, base_metadata: Optional[TableMetadata]) -> None: AssertDefaultSpecId, AssertDefaultSortOrderId, ], - Field(discriminator='type'), + Field(discriminator="type"), ] UpdatesAndRequirements = Tuple[Tuple[TableUpdate, ...], Tuple[TableRequirement, ...]] @@ -1153,7 +1153,7 @@ class Namespace(IcebergRootModel[List[str]]): root: List[str] = Field( ..., - description='Reference to one or more levels of a namespace', + description="Reference to one or more levels of a namespace", ) @@ -1793,7 +1793,7 @@ class Move: other_field_id: Optional[int] = None -U = TypeVar('U') +U = TypeVar("U") class UpdateTableMetadata(ABC, Generic[U]): @@ -2682,13 +2682,13 @@ class AddFileTask: def _new_manifest_path(location: str, num: int, commit_uuid: uuid.UUID) -> str: - return f'{location}/metadata/{commit_uuid}-m{num}.avro' + return f"{location}/metadata/{commit_uuid}-m{num}.avro" def _generate_manifest_list_path(location: str, snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str: # Mimics the behavior in Java: # https://github.com/apache/iceberg/blob/c862b9177af8e2d83122220764a056f3b96fd00c/core/src/main/java/org/apache/iceberg/SnapshotProducer.java#L491 - return f'{location}/metadata/snap-{snapshot_id}-{attempt}-{commit_uuid}.avro' + return f"{location}/metadata/snap-{snapshot_id}-{attempt}-{commit_uuid}.avro" def _dataframe_to_data_files( @@ -3242,7 +3242,7 @@ def _partition_field(self, transform_key: Tuple[int, Transform[Any, Any]], name: new_field_id = self._new_field_id() if name is None: - tmp_field = PartitionField(transform_key[0], new_field_id, transform_key[1], 'unassigned_field_name') + tmp_field = PartitionField(transform_key[0], new_field_id, transform_key[1], "unassigned_field_name") name = _visit_partition_field(self._transaction.table_metadata.schema(), tmp_field, _PartitionNameGenerator()) return PartitionField(transform_key[0], new_field_id, transform_key[1], name) @@ -3281,12 +3281,12 @@ def snapshots(self) -> "pa.Table": import pyarrow as pa snapshots_schema = pa.schema([ - pa.field('committed_at', pa.timestamp(unit='ms'), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('parent_id', pa.int64(), nullable=True), - pa.field('operation', pa.string(), nullable=True), - pa.field('manifest_list', pa.string(), nullable=False), - pa.field('summary', pa.map_(pa.string(), pa.string()), nullable=True), + pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("parent_id", pa.int64(), nullable=True), + pa.field("operation", pa.string(), nullable=True), + pa.field("manifest_list", pa.string(), nullable=False), + pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True), ]) snapshots = [] for snapshot in self.tbl.metadata.snapshots: @@ -3298,12 +3298,12 @@ def snapshots(self) -> "pa.Table": additional_properties = None snapshots.append({ - 'committed_at': datetime.utcfromtimestamp(snapshot.timestamp_ms / 1000.0), - 'snapshot_id': snapshot.snapshot_id, - 'parent_id': snapshot.parent_snapshot_id, - 'operation': str(operation), - 'manifest_list': snapshot.manifest_list, - 'summary': additional_properties, + "committed_at": datetime.utcfromtimestamp(snapshot.timestamp_ms / 1000.0), + "snapshot_id": snapshot.snapshot_id, + "parent_id": snapshot.parent_snapshot_id, + "operation": str(operation), + "manifest_list": snapshot.manifest_list, + "summary": additional_properties, }) return pa.Table.from_pylist( @@ -3340,33 +3340,33 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: pa_record_struct = schema_to_pyarrow(partition_record) entries_schema = pa.schema([ - pa.field('status', pa.int8(), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('sequence_number', pa.int64(), nullable=False), - pa.field('file_sequence_number', pa.int64(), nullable=False), + pa.field("status", pa.int8(), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("sequence_number", pa.int64(), nullable=False), + pa.field("file_sequence_number", pa.int64(), nullable=False), pa.field( - 'data_file', + "data_file", pa.struct([ - pa.field('content', pa.int8(), nullable=False), - pa.field('file_path', pa.string(), nullable=False), - pa.field('file_format', pa.string(), nullable=False), - pa.field('partition', pa_record_struct, nullable=False), - pa.field('record_count', pa.int64(), nullable=False), - pa.field('file_size_in_bytes', pa.int64(), nullable=False), - pa.field('column_sizes', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('null_value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('nan_value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('lower_bounds', pa.map_(pa.int32(), pa.binary()), nullable=True), - pa.field('upper_bounds', pa.map_(pa.int32(), pa.binary()), nullable=True), - pa.field('key_metadata', pa.binary(), nullable=True), - pa.field('split_offsets', pa.list_(pa.int64()), nullable=True), - pa.field('equality_ids', pa.list_(pa.int32()), nullable=True), - pa.field('sort_order_id', pa.int32(), nullable=True), + pa.field("content", pa.int8(), nullable=False), + pa.field("file_path", pa.string(), nullable=False), + pa.field("file_format", pa.string(), nullable=False), + pa.field("partition", pa_record_struct, nullable=False), + pa.field("record_count", pa.int64(), nullable=False), + pa.field("file_size_in_bytes", pa.int64(), nullable=False), + pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True), + pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True), + pa.field("key_metadata", pa.binary(), nullable=True), + pa.field("split_offsets", pa.list_(pa.int64()), nullable=True), + pa.field("equality_ids", pa.list_(pa.int32()), nullable=True), + pa.field("sort_order_id", pa.int32(), nullable=True), ]), nullable=False, ), - pa.field('readable_metrics', pa.struct(readable_metrics_struct), nullable=True), + pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True), ]) entries = [] @@ -3403,11 +3403,11 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: } entries.append({ - 'status': entry.status.value, - 'snapshot_id': entry.snapshot_id, - 'sequence_number': entry.data_sequence_number, - 'file_sequence_number': entry.file_sequence_number, - 'data_file': { + "status": entry.status.value, + "snapshot_id": entry.snapshot_id, + "sequence_number": entry.data_sequence_number, + "file_sequence_number": entry.file_sequence_number, + "data_file": { "content": entry.data_file.content, "file_path": entry.data_file.file_path, "file_format": entry.data_file.file_format, @@ -3426,7 +3426,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: "sort_order_id": entry.data_file.sort_order_id, "spec_id": entry.data_file.spec_id, }, - 'readable_metrics': readable_metrics, + "readable_metrics": readable_metrics, }) return pa.Table.from_pylist( @@ -3438,24 +3438,24 @@ def refs(self) -> "pa.Table": import pyarrow as pa ref_schema = pa.schema([ - pa.field('name', pa.string(), nullable=False), - pa.field('type', pa.dictionary(pa.int32(), pa.string()), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('max_reference_age_in_ms', pa.int64(), nullable=True), - pa.field('min_snapshots_to_keep', pa.int32(), nullable=True), - pa.field('max_snapshot_age_in_ms', pa.int64(), nullable=True), + pa.field("name", pa.string(), nullable=False), + pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("max_reference_age_in_ms", pa.int64(), nullable=True), + pa.field("min_snapshots_to_keep", pa.int32(), nullable=True), + pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True), ]) ref_results = [] for ref in self.tbl.metadata.refs: if snapshot_ref := self.tbl.metadata.refs.get(ref): ref_results.append({ - 'name': ref, - 'type': snapshot_ref.snapshot_ref_type.upper(), - 'snapshot_id': snapshot_ref.snapshot_id, - 'max_reference_age_in_ms': snapshot_ref.max_ref_age_ms, - 'min_snapshots_to_keep': snapshot_ref.min_snapshots_to_keep, - 'max_snapshot_age_in_ms': snapshot_ref.max_snapshot_age_ms, + "name": ref, + "type": snapshot_ref.snapshot_ref_type.upper(), + "snapshot_id": snapshot_ref.snapshot_id, + "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms, + "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep, + "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms, }) return pa.Table.from_pylist(ref_results, schema=ref_schema) @@ -3466,15 +3466,15 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table": from pyiceberg.io.pyarrow import schema_to_pyarrow table_schema = pa.schema([ - pa.field('record_count', pa.int64(), nullable=False), - pa.field('file_count', pa.int32(), nullable=False), - pa.field('total_data_file_size_in_bytes', pa.int64(), nullable=False), - pa.field('position_delete_record_count', pa.int64(), nullable=False), - pa.field('position_delete_file_count', pa.int32(), nullable=False), - pa.field('equality_delete_record_count', pa.int64(), nullable=False), - pa.field('equality_delete_file_count', pa.int32(), nullable=False), - pa.field('last_updated_at', pa.timestamp(unit='ms'), nullable=True), - pa.field('last_updated_snapshot_id', pa.int64(), nullable=True), + pa.field("record_count", pa.int64(), nullable=False), + pa.field("file_count", pa.int32(), nullable=False), + pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False), + pa.field("position_delete_record_count", pa.int64(), nullable=False), + pa.field("position_delete_file_count", pa.int32(), nullable=False), + pa.field("equality_delete_record_count", pa.int64(), nullable=False), + pa.field("equality_delete_file_count", pa.int32(), nullable=False), + pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True), + pa.field("last_updated_snapshot_id", pa.int64(), nullable=True), ]) partition_record = self.tbl.metadata.specs_struct() @@ -3483,8 +3483,8 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table": if has_partitions: pa_record_struct = schema_to_pyarrow(partition_record) partitions_schema = pa.schema([ - pa.field('partition', pa_record_struct, nullable=False), - pa.field('spec_id', pa.int32(), nullable=False), + pa.field("partition", pa_record_struct, nullable=False), + pa.field("spec_id", pa.int32(), nullable=False), ]) table_schema = pa.unify_schemas([partitions_schema, table_schema]) @@ -3561,18 +3561,18 @@ def manifests(self) -> "pa.Table": ]) manifest_schema = pa.schema([ - pa.field('content', pa.int8(), nullable=False), - pa.field('path', pa.string(), nullable=False), - pa.field('length', pa.int64(), nullable=False), - pa.field('partition_spec_id', pa.int32(), nullable=False), - pa.field('added_snapshot_id', pa.int64(), nullable=False), - pa.field('added_data_files_count', pa.int32(), nullable=False), - pa.field('existing_data_files_count', pa.int32(), nullable=False), - pa.field('deleted_data_files_count', pa.int32(), nullable=False), - pa.field('added_delete_files_count', pa.int32(), nullable=False), - pa.field('existing_delete_files_count', pa.int32(), nullable=False), - pa.field('deleted_delete_files_count', pa.int32(), nullable=False), - pa.field('partition_summaries', pa.list_(partition_summary_schema), nullable=False), + pa.field("content", pa.int8(), nullable=False), + pa.field("path", pa.string(), nullable=False), + pa.field("length", pa.int64(), nullable=False), + pa.field("partition_spec_id", pa.int32(), nullable=False), + pa.field("added_snapshot_id", pa.int64(), nullable=False), + pa.field("added_data_files_count", pa.int32(), nullable=False), + pa.field("existing_data_files_count", pa.int32(), nullable=False), + pa.field("deleted_data_files_count", pa.int32(), nullable=False), + pa.field("added_delete_files_count", pa.int32(), nullable=False), + pa.field("existing_delete_files_count", pa.int32(), nullable=False), + pa.field("deleted_delete_files_count", pa.int32(), nullable=False), + pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False), ]) def _partition_summaries_to_rows( @@ -3601,10 +3601,10 @@ def _partition_summaries_to_rows( else None ) rows.append({ - 'contains_null': field_summary.contains_null, - 'contains_nan': field_summary.contains_nan, - 'lower_bound': lower_bound, - 'upper_bound': upper_bound, + "contains_null": field_summary.contains_null, + "contains_nan": field_summary.contains_nan, + "lower_bound": lower_bound, + "upper_bound": upper_bound, }) return rows @@ -3615,18 +3615,18 @@ def _partition_summaries_to_rows( is_data_file = manifest.content == ManifestContent.DATA is_delete_file = manifest.content == ManifestContent.DELETES manifests.append({ - 'content': manifest.content, - 'path': manifest.manifest_path, - 'length': manifest.manifest_length, - 'partition_spec_id': manifest.partition_spec_id, - 'added_snapshot_id': manifest.added_snapshot_id, - 'added_data_files_count': manifest.added_files_count if is_data_file else 0, - 'existing_data_files_count': manifest.existing_files_count if is_data_file else 0, - 'deleted_data_files_count': manifest.deleted_files_count if is_data_file else 0, - 'added_delete_files_count': manifest.added_files_count if is_delete_file else 0, - 'existing_delete_files_count': manifest.existing_files_count if is_delete_file else 0, - 'deleted_delete_files_count': manifest.deleted_files_count if is_delete_file else 0, - 'partition_summaries': _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions) + "content": manifest.content, + "path": manifest.manifest_path, + "length": manifest.manifest_length, + "partition_spec_id": manifest.partition_spec_id, + "added_snapshot_id": manifest.added_snapshot_id, + "added_data_files_count": manifest.added_files_count if is_data_file else 0, + "existing_data_files_count": manifest.existing_files_count if is_data_file else 0, + "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0, + "added_delete_files_count": manifest.added_files_count if is_delete_file else 0, + "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0, + "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0, + "partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions) if manifest.partitions else [], }) @@ -3644,16 +3644,16 @@ class TablePartition: def _get_partition_sort_order(partition_columns: list[str], reverse: bool = False) -> dict[str, Any]: - order = 'ascending' if not reverse else 'descending' - null_placement = 'at_start' if reverse else 'at_end' - return {'sort_keys': [(column_name, order) for column_name in partition_columns], 'null_placement': null_placement} + order = "ascending" if not reverse else "descending" + null_placement = "at_start" if reverse else "at_end" + return {"sort_keys": [(column_name, order) for column_name in partition_columns], "null_placement": null_placement} def group_by_partition_scheme(arrow_table: pa.Table, partition_columns: list[str]) -> pa.Table: """Given a table, sort it by current partition scheme.""" # only works for identity for now sort_options = _get_partition_sort_order(partition_columns, reverse=False) - sorted_arrow_table = arrow_table.sort_by(sorting=sort_options['sort_keys'], null_placement=sort_options['null_placement']) + sorted_arrow_table = arrow_table.sort_by(sorting=sort_options["sort_keys"], null_placement=sort_options["null_placement"]) return sorted_arrow_table @@ -3676,7 +3676,7 @@ def _get_table_partitions( schema: Schema, slice_instructions: list[dict[str, Any]], ) -> list[TablePartition]: - sorted_slice_instructions = sorted(slice_instructions, key=lambda x: x['offset']) + sorted_slice_instructions = sorted(slice_instructions, key=lambda x: x["offset"]) partition_fields = partition_spec.fields diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index ba0c885758..8c3c389318 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -222,7 +222,7 @@ class TableMetadataCommonFields(IcebergBaseModel): current-snapshot-id even if the refs map is null.""" # validators - @field_validator('properties', mode='before') + @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) @@ -305,7 +305,7 @@ def sort_order_by_id(self, sort_order_id: int) -> Optional[SortOrder]: """Get the sort order by sort_order_id.""" return next((sort_order for sort_order in self.sort_orders if sort_order.order_id == sort_order_id), None) - @field_serializer('current_snapshot_id') + @field_serializer("current_snapshot_id") def serialize_current_snapshot_id(self, current_snapshot_id: Optional[int]) -> Optional[int]: if current_snapshot_id is None and Config().get_bool("legacy-current-snapshot-id"): return -1 @@ -319,7 +319,7 @@ def _generate_snapshot_id() -> int: """ rnd_uuid = uuid.uuid4() snapshot_id = int.from_bytes( - bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True + bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder="little", signed=True ) snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index baa15f168d..5a4e769003 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -40,12 +40,12 @@ class MappedField(IcebergBaseModel): names: List[str] = conlist(str, min_length=1) fields: List[MappedField] = Field(default_factory=list) - @field_validator('fields', mode='before') + @field_validator("fields", mode="before") @classmethod def convert_null_to_empty_List(cls, v: Any) -> Any: return v or [] - @field_validator('names', mode='after') + @field_validator("names", mode="after") @classmethod def check_at_least_one(cls, v: List[str]) -> Any: """ @@ -60,10 +60,10 @@ def check_at_least_one(cls, v: List[str]) -> Any: @model_serializer def ser_model(self) -> Dict[str, Any]: """Set custom serializer to leave out the field when it is empty.""" - fields = {'fields': self.fields} if len(self.fields) > 0 else {} + fields = {"fields": self.fields} if len(self.fields) > 0 else {} return { - 'field-id': self.field_id, - 'names': self.names, + "field-id": self.field_id, + "names": self.names, **fields, } @@ -87,7 +87,7 @@ def _field_by_name(self) -> Dict[str, MappedField]: return visit_name_mapping(self, _IndexByName()) def find(self, *names: str) -> MappedField: - name = '.'.join(names) + name = ".".join(names) try: return self._field_by_name[name] except KeyError as e: @@ -109,7 +109,7 @@ def __str__(self) -> str: return "[\n " + "\n ".join([str(e) for e in self.root]) + "\n]" -S = TypeVar('S') +S = TypeVar("S") T = TypeVar("T") diff --git a/pyiceberg/table/refs.py b/pyiceberg/table/refs.py index df18fadd31..d87a319a16 100644 --- a/pyiceberg/table/refs.py +++ b/pyiceberg/table/refs.py @@ -46,14 +46,14 @@ class SnapshotRef(IcebergBaseModel): max_snapshot_age_ms: Annotated[Optional[int], Field(alias="max-snapshot-age-ms", default=None, gt=0)] max_ref_age_ms: Annotated[Optional[int], Field(alias="max-ref-age-ms", default=None, gt=0)] - @model_validator(mode='after') - def check_min_snapshots_to_keep(self) -> 'SnapshotRef': + @model_validator(mode="after") + def check_min_snapshots_to_keep(self) -> "SnapshotRef": if self.min_snapshots_to_keep is not None and self.snapshot_ref_type == SnapshotRefType.TAG: raise ValidationError("Tags do not support setting minSnapshotsToKeep") return self - @model_validator(mode='after') - def check_max_snapshot_age_ms(self) -> 'SnapshotRef': + @model_validator(mode="after") + def check_max_snapshot_age_ms(self) -> "SnapshotRef": if self.max_snapshot_age_ms is not None and self.snapshot_ref_type == SnapshotRefType.TAG: raise ValidationError("Tags do not support setting maxSnapshotAgeMs") return self diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 79eb8b0b8a..e2ce3fe4f1 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -27,29 +27,29 @@ from pyiceberg.schema import Schema from pyiceberg.typedef import IcebergBaseModel -ADDED_DATA_FILES = 'added-data-files' -ADDED_DELETE_FILES = 'added-delete-files' -ADDED_EQUALITY_DELETES = 'added-equality-deletes' -ADDED_FILE_SIZE = 'added-files-size' -ADDED_POSITION_DELETES = 'added-position-deletes' -ADDED_POSITION_DELETE_FILES = 'added-position-delete-files' -ADDED_RECORDS = 'added-records' -DELETED_DATA_FILES = 'deleted-data-files' -DELETED_RECORDS = 'deleted-records' -ADDED_EQUALITY_DELETE_FILES = 'added-equality-delete-files' -REMOVED_DELETE_FILES = 'removed-delete-files' -REMOVED_EQUALITY_DELETES = 'removed-equality-deletes' -REMOVED_EQUALITY_DELETE_FILES = 'removed-equality-delete-files' -REMOVED_FILE_SIZE = 'removed-files-size' -REMOVED_POSITION_DELETES = 'removed-position-deletes' -REMOVED_POSITION_DELETE_FILES = 'removed-position-delete-files' -TOTAL_EQUALITY_DELETES = 'total-equality-deletes' -TOTAL_POSITION_DELETES = 'total-position-deletes' -TOTAL_DATA_FILES = 'total-data-files' -TOTAL_DELETE_FILES = 'total-delete-files' -TOTAL_RECORDS = 'total-records' -TOTAL_FILE_SIZE = 'total-files-size' -CHANGED_PARTITION_COUNT_PROP = 'changed-partition-count' +ADDED_DATA_FILES = "added-data-files" +ADDED_DELETE_FILES = "added-delete-files" +ADDED_EQUALITY_DELETES = "added-equality-deletes" +ADDED_FILE_SIZE = "added-files-size" +ADDED_POSITION_DELETES = "added-position-deletes" +ADDED_POSITION_DELETE_FILES = "added-position-delete-files" +ADDED_RECORDS = "added-records" +DELETED_DATA_FILES = "deleted-data-files" +DELETED_RECORDS = "deleted-records" +ADDED_EQUALITY_DELETE_FILES = "added-equality-delete-files" +REMOVED_DELETE_FILES = "removed-delete-files" +REMOVED_EQUALITY_DELETES = "removed-equality-deletes" +REMOVED_EQUALITY_DELETE_FILES = "removed-equality-delete-files" +REMOVED_FILE_SIZE = "removed-files-size" +REMOVED_POSITION_DELETES = "removed-position-deletes" +REMOVED_POSITION_DELETE_FILES = "removed-position-delete-files" +TOTAL_EQUALITY_DELETES = "total-equality-deletes" +TOTAL_POSITION_DELETES = "total-position-deletes" +TOTAL_DATA_FILES = "total-data-files" +TOTAL_DELETE_FILES = "total-delete-files" +TOTAL_RECORDS = "total-records" +TOTAL_FILE_SIZE = "total-files-size" +CHANGED_PARTITION_COUNT_PROP = "changed-partition-count" CHANGED_PARTITION_PREFIX = "partitions." OPERATION = "operation" @@ -181,14 +181,14 @@ def __init__(self, operation: Operation, **data: Any) -> None: def __getitem__(self, __key: str) -> Optional[Any]: # type: ignore """Return a key as it is a map.""" - if __key.lower() == 'operation': + if __key.lower() == "operation": return self.operation else: return self._additional_properties.get(__key) def __setitem__(self, key: str, value: Any) -> None: """Set a key as it is a map.""" - if key.lower() == 'operation': + if key.lower() == "operation": self.operation = value else: self._additional_properties[key] = value @@ -317,10 +317,10 @@ def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, str TOTAL_POSITION_DELETES, TOTAL_EQUALITY_DELETES, }: - summary[prop] = '0' + summary[prop] = "0" def get_prop(prop: str) -> int: - value = previous_summary.get(prop) or '0' + value = previous_summary.get(prop) or "0" try: return int(value) except ValueError as e: @@ -353,12 +353,12 @@ def update_snapshot_summaries( if not previous_summary: previous_summary = { - TOTAL_DATA_FILES: '0', - TOTAL_DELETE_FILES: '0', - TOTAL_RECORDS: '0', - TOTAL_FILE_SIZE: '0', - TOTAL_POSITION_DELETES: '0', - TOTAL_EQUALITY_DELETES: '0', + TOTAL_DATA_FILES: "0", + TOTAL_DELETE_FILES: "0", + TOTAL_RECORDS: "0", + TOTAL_FILE_SIZE: "0", + TOTAL_POSITION_DELETES: "0", + TOTAL_EQUALITY_DELETES: "0", } def _update_totals(total_property: str, added_property: str, removed_property: str) -> None: diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 26f4d4d5ac..2ff123148b 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -52,7 +52,7 @@ def update(self, *args: Any, **kwargs: Any) -> None: raise AttributeError("FrozenDict does not support .update()") -UTF8 = 'utf-8' +UTF8 = "utf-8" EMPTY_DICT = FrozenDict() diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index 8b1b81d3a7..5eb9cfaa66 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -127,7 +127,7 @@ def set_property(_config: RecursiveDict, path: List[str], config_value: str) -> if env_var_lower.startswith(PYICEBERG.lower()): key = env_var_lower[len(PYICEBERG) :] parts = key.split("__", maxsplit=2) - parts_normalized = [part.replace('__', '.').replace("_", "-") for part in parts] + parts_normalized = [part.replace("__", ".").replace("_", "-") for part in parts] set_property(config, parts_normalized, config_value) return config diff --git a/ruff.toml b/ruff.toml index 92fb9a9c80..caaa108c84 100644 --- a/ruff.toml +++ b/ruff.toml @@ -80,4 +80,4 @@ known-first-party = ["pyiceberg", "tests"] section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] [format] -quote-style = "preserve" +quote-style = "double" diff --git a/tests/avro/test_file.py b/tests/avro/test_file.py index 0809f56fea..4df132304c 100644 --- a/tests/avro/test_file.py +++ b/tests/avro/test_file.py @@ -173,13 +173,13 @@ def test_write_manifest_entry_with_iceberg_read_with_fastavro_v1() -> None: v2_entry = todict(entry) # These are not written in V1 - del v2_entry['data_sequence_number'] - del v2_entry['file_sequence_number'] - del v2_entry['data_file']['content'] - del v2_entry['data_file']['equality_ids'] + del v2_entry["data_sequence_number"] + del v2_entry["file_sequence_number"] + del v2_entry["data_file"]["content"] + del v2_entry["data_file"]["equality_ids"] # Required in V1 - v2_entry['data_file']['block_size_in_bytes'] = DEFAULT_BLOCK_SIZE + v2_entry["data_file"]["block_size_in_bytes"] = DEFAULT_BLOCK_SIZE assert v2_entry == fa_entry diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index ee43779073..21c415212a 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -484,7 +484,7 @@ def test_commit_table_properties( updated_table_metadata = table.metadata assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 1 - assert updated_table_metadata.properties == {'Description': 'test_description', "test_a": "test_aa", "test_c": "test_c"} + assert updated_table_metadata.properties == {"Description": "test_description", "test_a": "test_aa", "test_c": "test_c"} table_info = glue.get_table( DatabaseName=database_name, diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py index f4b16d343b..7ad1301d9d 100644 --- a/tests/catalog/test_dynamodb.py +++ b/tests/catalog/test_dynamodb.py @@ -569,10 +569,10 @@ def test_passing_provided_profile() -> None: } props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"} props.update(session_props) # type: ignore - with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session: + with mock.patch("boto3.Session", return_value=mock.Mock()) as mock_session: mock_client = mock.Mock() mock_session.return_value.client.return_value = mock_client - mock_client.describe_table.return_value = {'Table': {'TableStatus': 'ACTIVE'}} + mock_client.describe_table.return_value = {"Table": {"TableStatus": "ACTIVE"}} test_catalog = DynamoDbCatalog(catalog_name, **props) assert test_catalog.dynamodb is mock_client mock_session.assert_called_with(**session_props) @@ -590,4 +590,4 @@ def test_table_exists( # Act and Assert for an existing table assert test_catalog.table_exists(identifier) is True # Act and Assert for an non-existing table - assert test_catalog.table_exists(('non', 'exist')) is False + assert test_catalog.table_exists(("non", "exist")) is False diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 1aea46d6ef..6b57f1dfe6 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -715,7 +715,7 @@ def test_commit_table_properties( updated_table_metadata = table.metadata assert test_catalog._parse_metadata_version(table.metadata_location) == 1 - assert updated_table_metadata.properties == {'Description': 'test_description', "test_a": "test_aa", "test_c": "test_c"} + assert updated_table_metadata.properties == {"Description": "test_description", "test_a": "test_aa", "test_c": "test_c"} table_info = _glue.get_table( DatabaseName=database_name, @@ -847,7 +847,7 @@ def test_table_exists( # Act and Assert for an existing table assert test_catalog.table_exists(identifier) is True # Act and Assert for a non-existing table - assert test_catalog.table_exists(('non', 'exist')) is False + assert test_catalog.table_exists(("non", "exist")) is False @mock_aws diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index ef662b3aff..96e95815be 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -234,27 +234,27 @@ def test_create_table( retention=None, sd=StorageDescriptor( cols=[ - FieldSchema(name='boolean', type='boolean', comment=None), - FieldSchema(name='integer', type='int', comment=None), - FieldSchema(name='long', type='bigint', comment=None), - FieldSchema(name='float', type='float', comment=None), - FieldSchema(name='double', type='double', comment=None), - FieldSchema(name='decimal', type='decimal(32,3)', comment=None), - FieldSchema(name='date', type='date', comment=None), - FieldSchema(name='time', type='string', comment=None), - FieldSchema(name='timestamp', type='timestamp', comment=None), + FieldSchema(name="boolean", type="boolean", comment=None), + FieldSchema(name="integer", type="int", comment=None), + FieldSchema(name="long", type="bigint", comment=None), + FieldSchema(name="float", type="float", comment=None), + FieldSchema(name="double", type="double", comment=None), + FieldSchema(name="decimal", type="decimal(32,3)", comment=None), + FieldSchema(name="date", type="date", comment=None), + FieldSchema(name="time", type="string", comment=None), + FieldSchema(name="timestamp", type="timestamp", comment=None), FieldSchema( - name='timestamptz', - type='timestamp' if hive2_compatible else 'timestamp with local time zone', + name="timestamptz", + type="timestamp" if hive2_compatible else "timestamp with local time zone", comment=None, ), - FieldSchema(name='string', type='string', comment=None), - FieldSchema(name='uuid', type='string', comment=None), - FieldSchema(name='fixed', type='binary', comment=None), - FieldSchema(name='binary', type='binary', comment=None), - FieldSchema(name='list', type='array', comment=None), - FieldSchema(name='map', type='map', comment=None), - FieldSchema(name='struct', type='struct', comment=None), + FieldSchema(name="string", type="string", comment=None), + FieldSchema(name="uuid", type="string", comment=None), + FieldSchema(name="fixed", type="binary", comment=None), + FieldSchema(name="binary", type="binary", comment=None), + FieldSchema(name="list", type="array", comment=None), + FieldSchema(name="map", type="map", comment=None), + FieldSchema(name="struct", type="struct", comment=None), ], location=f"{hive_database.locationUri}/table", inputFormat="org.apache.hadoop.mapred.FileInputFormat", @@ -314,40 +314,40 @@ def test_create_table( last_column_id=22, schemas=[ Schema( - NestedField(field_id=1, name='boolean', field_type=BooleanType(), required=True), - NestedField(field_id=2, name='integer', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='long', field_type=LongType(), required=True), - NestedField(field_id=4, name='float', field_type=FloatType(), required=True), - NestedField(field_id=5, name='double', field_type=DoubleType(), required=True), - NestedField(field_id=6, name='decimal', field_type=DecimalType(precision=32, scale=3), required=True), - NestedField(field_id=7, name='date', field_type=DateType(), required=True), - NestedField(field_id=8, name='time', field_type=TimeType(), required=True), - NestedField(field_id=9, name='timestamp', field_type=TimestampType(), required=True), - NestedField(field_id=10, name='timestamptz', field_type=TimestamptzType(), required=True), - NestedField(field_id=11, name='string', field_type=StringType(), required=True), - NestedField(field_id=12, name='uuid', field_type=UUIDType(), required=True), - NestedField(field_id=13, name='fixed', field_type=FixedType(length=12), required=True), - NestedField(field_id=14, name='binary', field_type=BinaryType(), required=True), + NestedField(field_id=1, name="boolean", field_type=BooleanType(), required=True), + NestedField(field_id=2, name="integer", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="long", field_type=LongType(), required=True), + NestedField(field_id=4, name="float", field_type=FloatType(), required=True), + NestedField(field_id=5, name="double", field_type=DoubleType(), required=True), + NestedField(field_id=6, name="decimal", field_type=DecimalType(precision=32, scale=3), required=True), + NestedField(field_id=7, name="date", field_type=DateType(), required=True), + NestedField(field_id=8, name="time", field_type=TimeType(), required=True), + NestedField(field_id=9, name="timestamp", field_type=TimestampType(), required=True), + NestedField(field_id=10, name="timestamptz", field_type=TimestamptzType(), required=True), + NestedField(field_id=11, name="string", field_type=StringType(), required=True), + NestedField(field_id=12, name="uuid", field_type=UUIDType(), required=True), + NestedField(field_id=13, name="fixed", field_type=FixedType(length=12), required=True), + NestedField(field_id=14, name="binary", field_type=BinaryType(), required=True), NestedField( field_id=15, - name='list', - field_type=ListType(type='list', element_id=18, element_type=StringType(), element_required=True), + name="list", + field_type=ListType(type="list", element_id=18, element_type=StringType(), element_required=True), required=True, ), NestedField( field_id=16, - name='map', + name="map", field_type=MapType( - type='map', key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True + type="map", key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True ), required=True, ), NestedField( field_id=17, - name='struct', + name="struct", field_type=StructType( - NestedField(field_id=21, name='inner_string', field_type=StringType(), required=False), - NestedField(field_id=22, name='inner_int', field_type=IntegerType(), required=True), + NestedField(field_id=21, name="inner_string", field_type=StringType(), required=False), + NestedField(field_id=22, name="inner_int", field_type=IntegerType(), required=True), ), required=False, ), @@ -357,7 +357,7 @@ def test_create_table( ], current_schema_id=0, last_partition_id=999, - properties={"owner": "javaberg", 'write.parquet.compression-codec': 'zstd'}, + properties={"owner": "javaberg", "write.parquet.compression-codec": "zstd"}, partition_specs=[PartitionSpec()], default_spec_id=0, current_snapshot_id=None, @@ -409,27 +409,27 @@ def test_create_table_with_given_location_removes_trailing_slash( retention=None, sd=StorageDescriptor( cols=[ - FieldSchema(name='boolean', type='boolean', comment=None), - FieldSchema(name='integer', type='int', comment=None), - FieldSchema(name='long', type='bigint', comment=None), - FieldSchema(name='float', type='float', comment=None), - FieldSchema(name='double', type='double', comment=None), - FieldSchema(name='decimal', type='decimal(32,3)', comment=None), - FieldSchema(name='date', type='date', comment=None), - FieldSchema(name='time', type='string', comment=None), - FieldSchema(name='timestamp', type='timestamp', comment=None), + FieldSchema(name="boolean", type="boolean", comment=None), + FieldSchema(name="integer", type="int", comment=None), + FieldSchema(name="long", type="bigint", comment=None), + FieldSchema(name="float", type="float", comment=None), + FieldSchema(name="double", type="double", comment=None), + FieldSchema(name="decimal", type="decimal(32,3)", comment=None), + FieldSchema(name="date", type="date", comment=None), + FieldSchema(name="time", type="string", comment=None), + FieldSchema(name="timestamp", type="timestamp", comment=None), FieldSchema( - name='timestamptz', - type='timestamp' if hive2_compatible else 'timestamp with local time zone', + name="timestamptz", + type="timestamp" if hive2_compatible else "timestamp with local time zone", comment=None, ), - FieldSchema(name='string', type='string', comment=None), - FieldSchema(name='uuid', type='string', comment=None), - FieldSchema(name='fixed', type='binary', comment=None), - FieldSchema(name='binary', type='binary', comment=None), - FieldSchema(name='list', type='array', comment=None), - FieldSchema(name='map', type='map', comment=None), - FieldSchema(name='struct', type='struct', comment=None), + FieldSchema(name="string", type="string", comment=None), + FieldSchema(name="uuid", type="string", comment=None), + FieldSchema(name="fixed", type="binary", comment=None), + FieldSchema(name="binary", type="binary", comment=None), + FieldSchema(name="list", type="array", comment=None), + FieldSchema(name="map", type="map", comment=None), + FieldSchema(name="struct", type="struct", comment=None), ], location=f"{hive_database.locationUri}/table-given-location", inputFormat="org.apache.hadoop.mapred.FileInputFormat", @@ -489,40 +489,40 @@ def test_create_table_with_given_location_removes_trailing_slash( last_column_id=22, schemas=[ Schema( - NestedField(field_id=1, name='boolean', field_type=BooleanType(), required=True), - NestedField(field_id=2, name='integer', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='long', field_type=LongType(), required=True), - NestedField(field_id=4, name='float', field_type=FloatType(), required=True), - NestedField(field_id=5, name='double', field_type=DoubleType(), required=True), - NestedField(field_id=6, name='decimal', field_type=DecimalType(precision=32, scale=3), required=True), - NestedField(field_id=7, name='date', field_type=DateType(), required=True), - NestedField(field_id=8, name='time', field_type=TimeType(), required=True), - NestedField(field_id=9, name='timestamp', field_type=TimestampType(), required=True), - NestedField(field_id=10, name='timestamptz', field_type=TimestamptzType(), required=True), - NestedField(field_id=11, name='string', field_type=StringType(), required=True), - NestedField(field_id=12, name='uuid', field_type=UUIDType(), required=True), - NestedField(field_id=13, name='fixed', field_type=FixedType(length=12), required=True), - NestedField(field_id=14, name='binary', field_type=BinaryType(), required=True), + NestedField(field_id=1, name="boolean", field_type=BooleanType(), required=True), + NestedField(field_id=2, name="integer", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="long", field_type=LongType(), required=True), + NestedField(field_id=4, name="float", field_type=FloatType(), required=True), + NestedField(field_id=5, name="double", field_type=DoubleType(), required=True), + NestedField(field_id=6, name="decimal", field_type=DecimalType(precision=32, scale=3), required=True), + NestedField(field_id=7, name="date", field_type=DateType(), required=True), + NestedField(field_id=8, name="time", field_type=TimeType(), required=True), + NestedField(field_id=9, name="timestamp", field_type=TimestampType(), required=True), + NestedField(field_id=10, name="timestamptz", field_type=TimestamptzType(), required=True), + NestedField(field_id=11, name="string", field_type=StringType(), required=True), + NestedField(field_id=12, name="uuid", field_type=UUIDType(), required=True), + NestedField(field_id=13, name="fixed", field_type=FixedType(length=12), required=True), + NestedField(field_id=14, name="binary", field_type=BinaryType(), required=True), NestedField( field_id=15, - name='list', - field_type=ListType(type='list', element_id=18, element_type=StringType(), element_required=True), + name="list", + field_type=ListType(type="list", element_id=18, element_type=StringType(), element_required=True), required=True, ), NestedField( field_id=16, - name='map', + name="map", field_type=MapType( - type='map', key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True + type="map", key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True ), required=True, ), NestedField( field_id=17, - name='struct', + name="struct", field_type=StructType( - NestedField(field_id=21, name='inner_string', field_type=StringType(), required=False), - NestedField(field_id=22, name='inner_int', field_type=IntegerType(), required=True), + NestedField(field_id=21, name="inner_string", field_type=StringType(), required=False), + NestedField(field_id=22, name="inner_int", field_type=IntegerType(), required=True), ), required=False, ), @@ -532,7 +532,7 @@ def test_create_table_with_given_location_removes_trailing_slash( ], current_schema_id=0, last_partition_id=999, - properties={"owner": "javaberg", 'write.parquet.compression-codec': 'zstd'}, + properties={"owner": "javaberg", "write.parquet.compression-codec": "zstd"}, partition_specs=[PartitionSpec()], default_spec_id=0, current_snapshot_id=None, diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 285cfd9ab9..6dc498233e 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -169,10 +169,10 @@ def test_creation_with_unsupported_uri(catalog_name: str) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_tables_idempotency(catalog: SqlCatalog) -> None: @@ -182,10 +182,10 @@ def test_create_tables_idempotency(catalog: SqlCatalog) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -207,10 +207,10 @@ def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_neste @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -234,10 +234,10 @@ def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, table @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -263,10 +263,10 @@ def test_create_table_with_pyarrow_schema( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -288,10 +288,10 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier) pa.array([None, "A", "B", "C"]), # 'large' column ], schema=pa.schema([ - pa.field('foo', pa.string(), nullable=True), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), - pa.field('large', pa.large_string(), nullable=True), + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("large", pa.large_string(), nullable=True), ]), ) table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) @@ -302,10 +302,10 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -332,10 +332,10 @@ def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -361,10 +361,10 @@ def test_create_table_with_default_warehouse_location( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -393,10 +393,10 @@ def test_create_table_with_given_location_removes_trailing_slash( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -417,10 +417,10 @@ def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schem @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -443,10 +443,10 @@ def test_create_table_if_not_exists_duplicated_table( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: @@ -456,10 +456,10 @@ def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_sch @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: @@ -468,10 +468,10 @@ def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -494,10 +494,10 @@ def test_register_table(catalog: SqlCatalog, table_identifier: Identifier, metad @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -518,10 +518,10 @@ def test_register_existing_table(catalog: SqlCatalog, table_identifier: Identifi @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_register_table_with_non_existing_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: @@ -531,10 +531,10 @@ def test_register_table_with_non_existing_namespace(catalog: SqlCatalog, metadat @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: @@ -543,10 +543,10 @@ def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -569,10 +569,10 @@ def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, table_iden @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -597,11 +597,11 @@ def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -624,11 +624,11 @@ def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, table_iden @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -653,11 +653,11 @@ def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -674,11 +674,11 @@ def test_drop_table_that_does_not_exist(catalog: SqlCatalog, table_identifier: I @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -717,11 +717,11 @@ def test_rename_table( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -762,11 +762,11 @@ def test_rename_table_from_self_identifier( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -803,11 +803,11 @@ def test_rename_table_to_existing_one( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -835,11 +835,11 @@ def test_rename_missing_table(catalog: SqlCatalog, from_table_identifier: Identi @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -871,10 +871,10 @@ def test_rename_table_to_missing_namespace( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -914,10 +914,10 @@ def test_list_tables( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -927,10 +927,10 @@ def test_list_tables_when_missing_namespace(catalog: SqlCatalog, namespace: str) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str) -> None: @@ -941,10 +941,10 @@ def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -954,10 +954,10 @@ def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -968,10 +968,10 @@ def test_create_duplicate_namespace(catalog: SqlCatalog, namespace: str) -> None @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -982,10 +982,10 @@ def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, namespace: s @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -1004,10 +1004,10 @@ def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespa @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -1021,10 +1021,10 @@ def test_create_namespace_with_null_properties(catalog: SqlCatalog, namespace: s @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("empty_namespace", ["", (), (""), ("", ""), " ", (" ")]) @@ -1034,10 +1034,10 @@ def test_create_namespace_with_empty_identifier(catalog: SqlCatalog, empty_names @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")]) @@ -1054,10 +1054,10 @@ def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: @@ -1066,10 +1066,10 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -1094,10 +1094,10 @@ def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_ @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -1119,10 +1119,10 @@ def test_load_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -1133,10 +1133,10 @@ def test_load_empty_namespace_properties(catalog: SqlCatalog, namespace: str) -> @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) -> None: @@ -1145,10 +1145,10 @@ def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) - @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @@ -1176,11 +1176,11 @@ def test_update_namespace_properties(catalog: SqlCatalog, namespace: str) -> Non @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -1218,12 +1218,12 @@ def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, table_id @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), - lazy_fixture('catalog_sqlite_fsspec'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + lazy_fixture("catalog_sqlite_fsspec"), ], ) @pytest.mark.parametrize( @@ -1258,21 +1258,21 @@ def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, table_id assert table.metadata.snapshots[0].sequence_number == 1 assert table.metadata.snapshots[0].summary is not None assert table.metadata.snapshots[0].summary.operation == Operation.APPEND - assert table.metadata.snapshots[0].summary['added-data-files'] == '1' - assert table.metadata.snapshots[0].summary['added-records'] == '1' - assert table.metadata.snapshots[0].summary['total-data-files'] == '1' - assert table.metadata.snapshots[0].summary['total-records'] == '1' + assert table.metadata.snapshots[0].summary["added-data-files"] == "1" + assert table.metadata.snapshots[0].summary["added-records"] == "1" + assert table.metadata.snapshots[0].summary["total-data-files"] == "1" + assert table.metadata.snapshots[0].summary["total-records"] == "1" # read back the data assert df == table.scan().to_arrow() @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -1300,11 +1300,11 @@ def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schem @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize("format_version", [1, 2]) @@ -1323,7 +1323,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) @@ -1332,8 +1332,8 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -1351,11 +1351,11 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -1377,11 +1377,11 @@ def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Sc @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize( @@ -1405,10 +1405,10 @@ def test_table_properties_raise_for_none_value( @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) @pytest.mark.parametrize( @@ -1429,4 +1429,4 @@ def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, table_id assert catalog.table_exists(existing_table) is True # Act and Assert for a non-existing table - assert catalog.table_exists(('non', 'exist')) is False + assert catalog.table_exists(("non", "exist")) is False diff --git a/tests/conftest.py b/tests/conftest.py index 4baefafef4..01915b7d82 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -324,9 +324,9 @@ def pyarrow_schema_simple_without_ids() -> "pa.Schema": import pyarrow as pa return pa.schema([ - pa.field('foo', pa.string(), nullable=True), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), ]) @@ -335,12 +335,12 @@ def pyarrow_schema_nested_without_ids() -> "pa.Schema": import pyarrow as pa return pa.schema([ - pa.field('foo', pa.string(), nullable=False), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), - pa.field('qux', pa.list_(pa.string()), nullable=False), + pa.field("foo", pa.string(), nullable=False), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("qux", pa.list_(pa.string()), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -348,20 +348,20 @@ def pyarrow_schema_nested_without_ids() -> "pa.Schema": nullable=False, ), pa.field( - 'location', + "location", pa.list_( pa.struct([ - pa.field('latitude', pa.float32(), nullable=False), - pa.field('longitude', pa.float32(), nullable=False), + pa.field("latitude", pa.float32(), nullable=False), + pa.field("longitude", pa.float32(), nullable=False), ]), ), nullable=False, ), pa.field( - 'person', + "person", pa.struct([ - pa.field('name', pa.string(), nullable=True), - pa.field('age', pa.int32(), nullable=False), + pa.field("name", pa.string(), nullable=True), + pa.field("age", pa.int32(), nullable=False), ]), nullable=True, ), @@ -2081,31 +2081,31 @@ def spark() -> "SparkSession": TEST_DATA_WITH_NULL = { - 'bool': [False, None, True], - 'string': ['a', None, 'z'], + "bool": [False, None, True], + "string": ["a", None, "z"], # Go over the 16 bytes to kick in truncation - 'string_long': ['a' * 22, None, 'z' * 22], - 'int': [1, None, 9], - 'long': [1, None, 9], - 'float': [0.0, None, 0.9], - 'double': [0.0, None, 0.9], + "string_long": ["a" * 22, None, "z" * 22], + "int": [1, None, 9], + "long": [1, None, 9], + "float": [0.0, None, 0.9], + "double": [0.0, None, 0.9], # 'time': [1_000_000, None, 3_000_000], # Example times: 1s, none, and 3s past midnight #Spark does not support time fields - 'timestamp': [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)], - 'timestamptz': [ + "timestamp": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)], + "timestamptz": [ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc), None, datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc), ], - 'date': [date(2023, 1, 1), None, date(2023, 3, 1)], + "date": [date(2023, 1, 1), None, date(2023, 3, 1)], # Not supported by Spark # 'time': [time(1, 22, 0), None, time(19, 25, 0)], # Not natively supported by Arrow # 'uuid': [uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None, uuid.UUID('11111111-1111-1111-1111-111111111111').bytes], - 'binary': [b'\01', None, b'\22'], - 'fixed': [ - uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, + "binary": [b"\01", None, b"\22"], + "fixed": [ + uuid.UUID("00000000-0000-0000-0000-000000000000").bytes, None, - uuid.UUID('11111111-1111-1111-1111-111111111111').bytes, + uuid.UUID("11111111-1111-1111-1111-111111111111").bytes, ], } diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index f277672d87..87856a04f6 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1152,11 +1152,11 @@ def test_above_long_bounds_greater_than_or_equal( def test_eq_bound_expression(bound_reference_str: BoundReference[str]) -> None: - assert BoundEqualTo(term=bound_reference_str, literal=literal('a')) != BoundGreaterThanOrEqual( - term=bound_reference_str, literal=literal('a') + assert BoundEqualTo(term=bound_reference_str, literal=literal("a")) != BoundGreaterThanOrEqual( + term=bound_reference_str, literal=literal("a") ) - assert BoundEqualTo(term=bound_reference_str, literal=literal('a')) == BoundEqualTo( - term=bound_reference_str, literal=literal('a') + assert BoundEqualTo(term=bound_reference_str, literal=literal("a")) == BoundEqualTo( + term=bound_reference_str, literal=literal("a") ) diff --git a/tests/integration/test_add_files.py b/tests/integration/test_add_files.py index 94c73918c8..84729fcca4 100644 --- a/tests/integration/test_add_files.py +++ b/tests/integration/test_add_files.py @@ -65,10 +65,10 @@ ) ARROW_SCHEMA_WITH_IDS = pa.schema([ - pa.field('foo', pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}), - pa.field('bar', pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}), - pa.field('baz', pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}), - pa.field('qux', pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}), + pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}), + pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}), + pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}), + pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}), ]) diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 8665435e43..1f2b9a3ead 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -88,45 +88,45 @@ def test_inspect_snapshots( df = tbl.inspect.snapshots() assert df.column_names == [ - 'committed_at', - 'snapshot_id', - 'parent_id', - 'operation', - 'manifest_list', - 'summary', + "committed_at", + "snapshot_id", + "parent_id", + "operation", + "manifest_list", + "summary", ] - for committed_at in df['committed_at']: + for committed_at in df["committed_at"]: assert isinstance(committed_at.as_py(), datetime) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - assert df['parent_id'][0].as_py() is None - assert df['parent_id'][1:] == df['snapshot_id'][:2] + assert df["parent_id"][0].as_py() is None + assert df["parent_id"][1:] == df["snapshot_id"][:2] - assert [operation.as_py() for operation in df['operation']] == ['append', 'overwrite', 'append'] + assert [operation.as_py() for operation in df["operation"]] == ["append", "overwrite", "append"] - for manifest_list in df['manifest_list']: + for manifest_list in df["manifest_list"]: assert manifest_list.as_py().startswith("s3://") - assert df['summary'][0].as_py() == [ - ('added-files-size', '5459'), - ('added-data-files', '1'), - ('added-records', '3'), - ('total-data-files', '1'), - ('total-delete-files', '0'), - ('total-records', '3'), - ('total-files-size', '5459'), - ('total-position-deletes', '0'), - ('total-equality-deletes', '0'), + assert df["summary"][0].as_py() == [ + ("added-files-size", "5459"), + ("added-data-files", "1"), + ("added-records", "3"), + ("total-data-files", "1"), + ("total-delete-files", "0"), + ("total-records", "3"), + ("total-files-size", "5459"), + ("total-position-deletes", "0"), + ("total-equality-deletes", "0"), ] lhs = spark.table(f"{identifier}.snapshots").toPandas() rhs = df.to_pandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'summary': + if column == "summary": # Arrow returns a list of tuples, instead of a dict right = dict(right) @@ -150,29 +150,29 @@ def test_inspect_entries( def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: assert df.column_names == [ - 'status', - 'snapshot_id', - 'sequence_number', - 'file_sequence_number', - 'data_file', - 'readable_metrics', + "status", + "snapshot_id", + "sequence_number", + "file_sequence_number", + "data_file", + "readable_metrics", ] # Make sure that they are filled properly - for int_column in ['status', 'snapshot_id', 'sequence_number', 'file_sequence_number']: + for int_column in ["status", "snapshot_id", "sequence_number", "file_sequence_number"]: for value in df[int_column]: assert isinstance(value.as_py(), int) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) lhs = df.to_pandas() rhs = spark_df.toPandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'data_file': + if column == "data_file": for df_column in left.keys(): - if df_column == 'partition': + if df_column == "partition": # Spark leaves out the partition if the table is unpartitioned continue @@ -183,20 +183,20 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non df_lhs = dict(df_lhs) assert df_lhs == df_rhs, f"Difference in data_file column {df_column}: {df_lhs} != {df_rhs}" - elif column == 'readable_metrics': + elif column == "readable_metrics": assert list(left.keys()) == [ - 'bool', - 'string', - 'string_long', - 'int', - 'long', - 'float', - 'double', - 'timestamp', - 'timestamptz', - 'date', - 'binary', - 'fixed', + "bool", + "string", + "string_long", + "int", + "long", + "float", + "double", + "timestamp", + "timestamptz", + "date", + "binary", + "fixed", ] assert left.keys() == right.keys() @@ -205,18 +205,18 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non rm_lhs = left[rm_column] rm_rhs = right[rm_column] - assert rm_lhs['column_size'] == rm_rhs['column_size'] - assert rm_lhs['value_count'] == rm_rhs['value_count'] - assert rm_lhs['null_value_count'] == rm_rhs['null_value_count'] - assert rm_lhs['nan_value_count'] == rm_rhs['nan_value_count'] + assert rm_lhs["column_size"] == rm_rhs["column_size"] + assert rm_lhs["value_count"] == rm_rhs["value_count"] + assert rm_lhs["null_value_count"] == rm_rhs["null_value_count"] + assert rm_lhs["nan_value_count"] == rm_rhs["nan_value_count"] - if rm_column == 'timestamptz': + if rm_column == "timestamptz": # PySpark does not correctly set the timstamptz - rm_rhs['lower_bound'] = rm_rhs['lower_bound'].replace(tzinfo=pytz.utc) - rm_rhs['upper_bound'] = rm_rhs['upper_bound'].replace(tzinfo=pytz.utc) + rm_rhs["lower_bound"] = rm_rhs["lower_bound"].replace(tzinfo=pytz.utc) + rm_rhs["upper_bound"] = rm_rhs["upper_bound"].replace(tzinfo=pytz.utc) - assert rm_lhs['lower_bound'] == rm_rhs['lower_bound'] - assert rm_lhs['upper_bound'] == rm_rhs['upper_bound'] + assert rm_lhs["lower_bound"] == rm_rhs["lower_bound"] + assert rm_lhs["upper_bound"] == rm_rhs["upper_bound"] else: assert left == right, f"Difference in column {column}: {left} != {right}" @@ -265,8 +265,8 @@ def test_inspect_entries_partitioned(spark: SparkSession, session_catalog: Catal df = session_catalog.load_table(identifier).inspect.entries() - assert df.to_pydict()['data_file'][0]['partition'] == {'dt_day': date(2021, 2, 1), 'dt_month': None} - assert df.to_pydict()['data_file'][1]['partition'] == {'dt_day': None, 'dt_month': 612} + assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": date(2021, 2, 1), "dt_month": None} + assert df.to_pydict()["data_file"][1]["partition"] == {"dt_day": None, "dt_month": 612} @pytest.mark.integration @@ -301,21 +301,21 @@ def test_inspect_refs( df = tbl.refresh().inspect.refs() assert df.column_names == [ - 'name', - 'type', - 'snapshot_id', - 'max_reference_age_in_ms', - 'min_snapshots_to_keep', - 'max_snapshot_age_in_ms', + "name", + "type", + "snapshot_id", + "max_reference_age_in_ms", + "min_snapshots_to_keep", + "max_snapshot_age_in_ms", ] - assert [name.as_py() for name in df['name']] == ['testBranch', 'main', 'testTag'] - assert [ref_type.as_py() for ref_type in df['type']] == ['BRANCH', 'BRANCH', 'TAG'] + assert [name.as_py() for name in df["name"]] == ["testBranch", "main", "testTag"] + assert [ref_type.as_py() for ref_type in df["type"]] == ["BRANCH", "BRANCH", "TAG"] - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - for int_column in ['max_reference_age_in_ms', 'min_snapshots_to_keep', 'max_snapshot_age_in_ms']: + for int_column in ["max_reference_age_in_ms", "min_snapshots_to_keep", "max_snapshot_age_in_ms"]: for value in df[int_column]: assert isinstance(value.as_py(), int) or not value.as_py() @@ -343,28 +343,28 @@ def test_inspect_partitions_unpartitioned( df = tbl.inspect.partitions() assert df.column_names == [ - 'record_count', - 'file_count', - 'total_data_file_size_in_bytes', - 'position_delete_record_count', - 'position_delete_file_count', - 'equality_delete_record_count', - 'equality_delete_file_count', - 'last_updated_at', - 'last_updated_snapshot_id', + "record_count", + "file_count", + "total_data_file_size_in_bytes", + "position_delete_record_count", + "position_delete_file_count", + "equality_delete_record_count", + "equality_delete_file_count", + "last_updated_at", + "last_updated_snapshot_id", ] - for last_updated_at in df['last_updated_at']: + for last_updated_at in df["last_updated_at"]: assert isinstance(last_updated_at.as_py(), datetime) int_cols = [ - 'record_count', - 'file_count', - 'total_data_file_size_in_bytes', - 'position_delete_record_count', - 'position_delete_file_count', - 'equality_delete_record_count', - 'equality_delete_file_count', - 'last_updated_snapshot_id', + "record_count", + "file_count", + "total_data_file_size_in_bytes", + "position_delete_record_count", + "position_delete_file_count", + "equality_delete_record_count", + "equality_delete_file_count", + "last_updated_snapshot_id", ] for column in int_cols: for value in df[column]: @@ -434,8 +434,8 @@ def test_inspect_partitions_partitioned(spark: SparkSession, session_catalog: Ca ) def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: - lhs = df.to_pandas().sort_values('spec_id') - rhs = spark_df.toPandas().sort_values('spec_id') + lhs = df.to_pandas().sort_values("spec_id") + rhs = spark_df.toPandas().sort_values("spec_id") for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): assert left == right, f"Difference in column {column}: {left} != {right}" @@ -481,31 +481,31 @@ def test_inspect_manifests(spark: SparkSession, session_catalog: Catalog, format df = session_catalog.load_table(identifier).inspect.manifests() assert df.column_names == [ - 'content', - 'path', - 'length', - 'partition_spec_id', - 'added_snapshot_id', - 'added_data_files_count', - 'existing_data_files_count', - 'deleted_data_files_count', - 'added_delete_files_count', - 'existing_delete_files_count', - 'deleted_delete_files_count', - 'partition_summaries', + "content", + "path", + "length", + "partition_spec_id", + "added_snapshot_id", + "added_data_files_count", + "existing_data_files_count", + "deleted_data_files_count", + "added_delete_files_count", + "existing_delete_files_count", + "deleted_delete_files_count", + "partition_summaries", ] int_cols = [ - 'content', - 'length', - 'partition_spec_id', - 'added_snapshot_id', - 'added_data_files_count', - 'existing_data_files_count', - 'deleted_data_files_count', - 'added_delete_files_count', - 'existing_delete_files_count', - 'deleted_delete_files_count', + "content", + "length", + "partition_spec_id", + "added_snapshot_id", + "added_data_files_count", + "existing_data_files_count", + "deleted_data_files_count", + "added_delete_files_count", + "existing_delete_files_count", + "deleted_delete_files_count", ] for column in int_cols: diff --git a/tests/integration/test_partition_evolution.py b/tests/integration/test_partition_evolution.py index 785b34b82c..5cc7512f4a 100644 --- a/tests/integration/test_partition_evolution.py +++ b/tests/integration/test_partition_evolution.py @@ -73,7 +73,7 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_identity_partition(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _simple_table(catalog, table_schema_simple) simple_table.update_spec().add_identity("foo").commit() @@ -85,7 +85,7 @@ def test_add_identity_partition(catalog: Catalog, table_schema_simple: Schema) - @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_year(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", YearTransform(), "year_transform").commit() @@ -93,7 +93,7 @@ def test_add_year(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_month(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", MonthTransform(), "month_transform").commit() @@ -101,7 +101,7 @@ def test_add_month(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_day(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", DayTransform(), "day_transform").commit() @@ -109,7 +109,7 @@ def test_add_day(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_hour(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", HourTransform(), "hour_transform").commit() @@ -117,7 +117,7 @@ def test_add_hour(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") simple_table.update_spec().add_field("foo", BucketTransform(12), "bucket_transform").commit() @@ -125,7 +125,7 @@ def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") simple_table.update_spec().add_field("foo", TruncateTransform(1), "truncate_transform").commit() @@ -135,7 +135,7 @@ def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field( @@ -153,7 +153,7 @@ def test_multiple_adds(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_hour_to_day(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", DayTransform(), "daily_partitioned").commit() @@ -169,7 +169,7 @@ def test_add_hour_to_day(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_multiple_buckets(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("id", BucketTransform(16)).add_field("id", BucketTransform(4)).commit() @@ -184,7 +184,7 @@ def test_add_multiple_buckets(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_identity(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").commit() @@ -192,12 +192,12 @@ def test_remove_identity(catalog: Catalog) -> None: assert len(table.specs()) == 3 assert table.spec().spec_id == 2 assert table.spec() == PartitionSpec( - PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='id'), spec_id=2 + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id"), spec_id=2 ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_identity_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) table_v2.update_spec().add_identity("id").commit() @@ -208,7 +208,7 @@ def test_remove_identity_v2(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_bucket(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -223,13 +223,13 @@ def test_remove_bucket(catalog: Catalog) -> None: 1001, 2, 1001, - PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='bucketed_id'), - PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts'), + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="bucketed_id"), + PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts"), ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_bucket_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -239,12 +239,12 @@ def test_remove_bucket_v2(catalog: Catalog) -> None: remove.remove_field("bucketed_id") assert len(table_v2.specs()) == 3 _validate_new_partition_fields( - table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts') + table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts") ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_day(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -259,13 +259,13 @@ def test_remove_day(catalog: Catalog) -> None: 1001, 2, 1001, - PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id'), - PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name='day_ts'), + PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id"), + PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name="day_ts"), ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_day_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -275,12 +275,12 @@ def test_remove_day_v2(catalog: Catalog) -> None: remove.remove_field("day_ts") assert len(table_v2.specs()) == 3 _validate_new_partition_fields( - table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id') + table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id") ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_rename(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").commit() @@ -291,7 +291,7 @@ def test_rename(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_and_remove(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -300,7 +300,7 @@ def test_cannot_add_and_remove(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -311,7 +311,7 @@ def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_delete_and_rename(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -321,7 +321,7 @@ def test_cannot_delete_and_rename(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_rename_and_delete(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -331,7 +331,7 @@ def test_cannot_rename_and_delete(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -342,7 +342,7 @@ def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -353,7 +353,7 @@ def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -364,7 +364,7 @@ def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: table = _table(catalog) with table.transaction() as transaction: @@ -387,17 +387,17 @@ def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: ) assert table.schema() == Schema( - NestedField(field_id=1, name='id', field_type=LongType(), required=False), - NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False), - NestedField(field_id=3, name='str', field_type=StringType(), required=False), - NestedField(field_id=4, name='col_string', field_type=StringType(), required=False), + NestedField(field_id=1, name="id", field_type=LongType(), required=False), + NestedField(field_id=2, name="event_ts", field_type=TimestampType(), required=False), + NestedField(field_id=3, name="str", field_type=StringType(), required=False), + NestedField(field_id=4, name="col_string", field_type=StringType(), required=False), identifier_field_ids=[], ) assert table.schema().schema_id == 1 @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -419,7 +419,7 @@ def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -433,7 +433,7 @@ def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_remove_and_add_reuses_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py index d89ecaf202..29f664909c 100644 --- a/tests/integration/test_partitioning_key.py +++ b/tests/integration/test_partitioning_key.py @@ -328,8 +328,8 @@ ), ( [PartitionField(source_id=11, field_id=1001, transform=IdentityTransform(), name="binary_field")], - [b'example'], - Record(binary_field=b'example'), + [b"example"], + Record(binary_field=b"example"), "binary_field=ZXhhbXBsZQ%3D%3D", f"""CREATE TABLE {identifier} ( binary_field binary, @@ -347,8 +347,8 @@ ), ( [PartitionField(source_id=13, field_id=1001, transform=IdentityTransform(), name="decimal_field")], - [Decimal('123.45')], - Record(decimal_field=Decimal('123.45')), + [Decimal("123.45")], + Record(decimal_field=Decimal("123.45")), "decimal_field=123.45", f"""CREATE TABLE {identifier} ( decimal_field decimal(5,2), @@ -638,8 +638,8 @@ ), ( [PartitionField(source_id=13, field_id=1001, transform=TruncateTransform(width=5), name="decimal_field_trunc")], - [Decimal('678.93')], - Record(decimal_field_trunc=Decimal('678.90')), + [Decimal("678.93")], + Record(decimal_field_trunc=Decimal("678.90")), "decimal_field_trunc=678.90", # Assuming truncation width of 1 leads to truncating to 670 f"""CREATE TABLE {identifier} ( decimal_field decimal(5,2), @@ -657,8 +657,8 @@ ), ( [PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(10), name="binary_field_trunc")], - [b'HELLOICEBERG'], - Record(binary_field_trunc=b'HELLOICEBE'), + [b"HELLOICEBERG"], + Record(binary_field_trunc=b"HELLOICEBE"), "binary_field_trunc=SEVMTE9JQ0VCRQ%3D%3D", f"""CREATE TABLE {identifier} ( binary_field binary, diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 2a10e37ba9..80a6f18632 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -51,7 +51,7 @@ ) from pyiceberg.utils.concurrent import ExecutorFactory -DEFAULT_PROPERTIES = {'write.parquet.compression-codec': 'zstd'} +DEFAULT_PROPERTIES = {"write.parquet.compression-codec": "zstd"} TABLE_NAME = ("default", "t1") @@ -74,7 +74,7 @@ def create_table(catalog: Catalog) -> Table: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties(catalog: Catalog) -> None: table = create_table(catalog) @@ -104,7 +104,7 @@ def test_table_properties(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_dict(catalog: Catalog) -> None: table = create_table(catalog) @@ -134,7 +134,7 @@ def test_table_properties_dict(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_error(catalog: Catalog) -> None: table = create_table(catalog) properties = {"abc": "def"} @@ -144,7 +144,7 @@ def test_table_properties_error(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_nan(catalog: Catalog) -> None: table_test_null_nan = catalog.load_table("default.test_null_nan") arrow_table = table_test_null_nan.scan(row_filter=IsNaN("col_numeric"), selected_fields=("idx", "col_numeric")).to_arrow() @@ -154,7 +154,7 @@ def test_pyarrow_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") arrow_table = table_test_null_nan_rewritten.scan( @@ -166,7 +166,7 @@ def test_pyarrow_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) @pytest.mark.skip(reason="Fixing issues with NaN's: https://github.com/apache/arrow/issues/34162") def test_pyarrow_not_nan_count(catalog: Catalog) -> None: table_test_null_nan = catalog.load_table("default.test_null_nan") @@ -175,7 +175,7 @@ def test_pyarrow_not_nan_count(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_duckdb_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") con = table_test_null_nan_rewritten.scan().to_duckdb("table_test_null_nan") @@ -185,7 +185,7 @@ def test_duckdb_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_limit(catalog: Catalog) -> None: table_test_limit = catalog.load_table("default.test_limit") limited_result = table_test_limit.scan(selected_fields=("idx",), limit=1).to_arrow() @@ -200,7 +200,7 @@ def test_pyarrow_limit(catalog: Catalog) -> None: @pytest.mark.integration @pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_daft_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") df = table_test_null_nan_rewritten.to_daft() @@ -209,7 +209,7 @@ def test_daft_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_daft_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") df = table_test_null_nan_rewritten.to_daft() @@ -222,7 +222,7 @@ def test_daft_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration @pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan().to_ray() @@ -231,7 +231,7 @@ def test_ray_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan( @@ -243,7 +243,7 @@ def test_ray_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) @pytest.mark.skip(reason="Fixing issues with NaN's: https://github.com/apache/arrow/issues/34162") def test_ray_not_nan_count(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") @@ -252,7 +252,7 @@ def test_ray_not_nan_count(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_all_types(catalog: Catalog) -> None: table_test_all_types = catalog.load_table("default.test_all_types") ray_dataset = table_test_all_types.scan().to_ray() @@ -262,7 +262,7 @@ def test_ray_all_types(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_to_iceberg_all_types(catalog: Catalog) -> None: table_test_all_types = catalog.load_table("default.test_all_types") fs = S3FileSystem( @@ -281,7 +281,7 @@ def test_pyarrow_to_iceberg_all_types(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_deletes(catalog: Catalog) -> None: # number, letter # (1, 'a'), @@ -318,7 +318,7 @@ def test_pyarrow_deletes(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_deletes_double(catalog: Catalog) -> None: # number, letter # (1, 'a'), @@ -355,7 +355,7 @@ def test_pyarrow_deletes_double(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_partitioned_tables(catalog: Catalog) -> None: for table_name, predicate in [ ("test_partitioned_by_identity", "ts >= '2023-03-05T00:00:00+00:00'"), @@ -372,7 +372,7 @@ def test_partitioned_tables(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_unpartitioned_uuid_table(catalog: Catalog) -> None: unpartitioned_uuid = catalog.load_table("default.test_uuid_and_fixed_unpartitioned") arrow_table_eq = unpartitioned_uuid.scan(row_filter="uuid_col == '102cb62f-e6f8-4eb0-9973-d9b012ff0967'").to_arrow() @@ -389,7 +389,7 @@ def test_unpartitioned_uuid_table(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_unpartitioned_fixed_table(catalog: Catalog) -> None: fixed_table = catalog.load_table("default.test_uuid_and_fixed_unpartitioned") arrow_table_eq = fixed_table.scan(row_filter=EqualTo("fixed_col", b"1234567890123456789012345")).to_arrow() @@ -408,7 +408,7 @@ def test_unpartitioned_fixed_table(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_scan_tag(catalog: Catalog) -> None: test_positional_mor_deletes = catalog.load_table("default.test_positional_mor_deletes") arrow_table = test_positional_mor_deletes.scan().use_ref("tag_12").to_arrow() @@ -416,7 +416,7 @@ def test_scan_tag(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_scan_branch(catalog: Catalog) -> None: test_positional_mor_deletes = catalog.load_table("default.test_positional_mor_deletes") arrow_table = test_positional_mor_deletes.scan().use_ref("without_5").to_arrow() @@ -424,21 +424,21 @@ def test_scan_branch(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_filter_on_new_column(catalog: Catalog) -> None: test_table_add_column = catalog.load_table("default.test_table_add_column") arrow_table = test_table_add_column.scan(row_filter="b == '2'").to_arrow() - assert arrow_table["b"].to_pylist() == ['2'] + assert arrow_table["b"].to_pylist() == ["2"] arrow_table = test_table_add_column.scan(row_filter="b is not null").to_arrow() - assert arrow_table["b"].to_pylist() == ['2'] + assert arrow_table["b"].to_pylist() == ["2"] arrow_table = test_table_add_column.scan(row_filter="b is null").to_arrow() assert arrow_table["b"].to_pylist() == [None] @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_upgrade_table_version(catalog: Catalog) -> None: table_test_table_version = catalog.load_table("default.test_table_version") @@ -466,7 +466,7 @@ def test_upgrade_table_version(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_sanitize_character(catalog: Catalog) -> None: table_test_table_sanitized_character = catalog.load_table("default.test_table_sanitized_character") arrow_table = table_test_table_sanitized_character.scan().to_arrow() @@ -476,7 +476,7 @@ def test_sanitize_character(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_null_list_and_map(catalog: Catalog) -> None: table_test_empty_list_and_map = catalog.load_table("default.test_table_empty_list_and_map") arrow_table = table_test_empty_list_and_map.scan().to_arrow() @@ -485,7 +485,7 @@ def test_null_list_and_map(catalog: Catalog) -> None: # This should be: # assert arrow_table["col_list_with_struct"].to_pylist() == [None, [{'test': 1}]] # Once https://github.com/apache/arrow/issues/38809 has been fixed - assert arrow_table["col_list_with_struct"].to_pylist() == [[], [{'test': 1}]] + assert arrow_table["col_list_with_struct"].to_pylist() == [[], [{"test": 1}]] @pytest.mark.integration diff --git a/tests/integration/test_rest_manifest.py b/tests/integration/test_rest_manifest.py index 0e768c6e68..82c41cfd93 100644 --- a/tests/integration/test_rest_manifest.py +++ b/tests/integration/test_rest_manifest.py @@ -104,7 +104,7 @@ def test_write_sample_manifest(table_test_all_types: Table) -> None: wrapped_entry_v2.data_file = wrapped_data_file_v2_debug wrapped_entry_v2_dict = todict(wrapped_entry_v2) # This one should not be written - del wrapped_entry_v2_dict['data_file']['spec_id'] + del wrapped_entry_v2_dict["data_file"]["spec_id"] with TemporaryDirectory() as tmpdir: tmp_avro_file = tmpdir + "/test_write_manifest.avro" diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index ac5d1ce050..f4ab98a883 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -358,16 +358,16 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non assert simple_table.schemas() == { 0: Schema( - NestedField(field_id=1, name='foo', field_type=StringType(), required=False), - NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), identifier_field_ids=[2], ), 1: Schema( - NestedField(field_id=1, name='foo', field_type=StringType(), required=False), - NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), - NestedField(field_id=4, name='data', field_type=IntegerType(), required=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + NestedField(field_id=4, name="data", field_type=IntegerType(), required=False), identifier_field_ids=[2], ), } @@ -685,9 +685,9 @@ def test_rename_simple(simple_table: Table) -> None: # Check that the name mapping gets updated assert simple_table.name_mapping() == NameMapping([ - MappedField(field_id=1, names=['foo', 'vo']), - MappedField(field_id=2, names=['bar', 'var']), - MappedField(field_id=3, names=['baz']), + MappedField(field_id=1, names=["foo", "vo"]), + MappedField(field_id=2, names=["bar", "var"]), + MappedField(field_id=3, names=["baz"]), ]) @@ -719,7 +719,7 @@ def test_rename_simple_nested(catalog: Catalog) -> None: # Check that the name mapping gets updated assert tbl.name_mapping() == NameMapping([ - MappedField(field_id=1, names=['foo'], fields=[MappedField(field_id=2, names=['bar', 'vo'])]), + MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]), ]) diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py index d84b9745a7..5cb03e59d8 100644 --- a/tests/integration/test_writes/test_partitioned_writes.py +++ b/tests/integration/test_writes/test_partitioned_writes.py @@ -38,7 +38,7 @@ @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_null_partitioned( @@ -71,7 +71,7 @@ def test_query_filter_null_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_without_data_partitioned( @@ -103,7 +103,7 @@ def test_query_filter_without_data_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_only_nulls_partitioned( @@ -135,7 +135,7 @@ def test_query_filter_only_nulls_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_appended_null_partitioned( @@ -174,7 +174,7 @@ def test_query_filter_appended_null_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] ) def test_query_filter_v1_v2_append_null( session_catalog: Catalog, spark: SparkSession, arrow_table_with_null: pa.Table, part_col: str @@ -225,7 +225,7 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '2'}, + properties={"format-version": "2"}, ) tbl.append(arrow_table_with_null) @@ -240,33 +240,33 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append'] + assert operations == ["append", "append"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'changed-partition-count': '3', - 'added-data-files': '3', - 'added-files-size': '15029', - 'added-records': '3', - 'total-data-files': '3', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '15029', - 'total-position-deletes': '0', - 'total-records': '3', + "changed-partition-count": "3", + "added-data-files": "3", + "added-files-size": "15029", + "added-records": "3", + "total-data-files": "3", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "15029", + "total-position-deletes": "0", + "total-records": "3", } assert summaries[1] == { - 'changed-partition-count': '3', - 'added-data-files': '3', - 'added-files-size': '15029', - 'added-records': '3', - 'total-data-files': '6', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '30058', - 'total-position-deletes': '0', - 'total-records': '6', + "changed-partition-count": "3", + "added-data-files": "3", + "added-files-size": "15029", + "added-records": "3", + "total-data-files": "6", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "30058", + "total-position-deletes": "0", + "total-records": "6", } @@ -284,7 +284,7 @@ def test_data_files_with_table_partitioned_with_null( identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) tbl.append(arrow_table_with_null) @@ -320,7 +320,7 @@ def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog) -> Non identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): @@ -379,7 +379,7 @@ def test_unsupported_transform( identifier=identifier, schema=TABLE_SCHEMA, partition_spec=spec, - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) with pytest.raises(ValueError, match="All transforms are not supported.*"): diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 74b6857dce..0941b35850 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -186,47 +186,47 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append', 'overwrite'] + assert operations == ["append", "append", "overwrite"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '5459', - 'total-position-deletes': '0', - 'total-records': '3', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "5459", + "total-position-deletes": "0", + "total-records": "3", } assert summaries[1] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'total-data-files': '2', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '10918', - 'total-position-deletes': '0', - 'total-records': '6', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "total-data-files": "2", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "10918", + "total-position-deletes": "0", + "total-records": "6", } assert summaries[2] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'deleted-data-files': '2', - 'deleted-records': '6', - 'removed-files-size': '10918', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '5459', - 'total-position-deletes': '0', - 'total-records': '3', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "deleted-data-files": "2", + "deleted-records": "6", + "removed-files-size": "10918", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "5459", + "total-position-deletes": "0", + "total-records": "3", } @@ -283,25 +283,25 @@ def test_python_writes_special_character_column_with_spark_reads( identifier = "default.python_writes_special_character_column_with_spark_reads" column_name_with_special_character = "letter/abc" TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN = { - column_name_with_special_character: ['a', None, 'z'], - 'id': [1, 2, 3], - 'name': ['AB', 'CD', 'EF'], - 'address': [ - {'street': '123', 'city': 'SFO', 'zip': 12345, column_name_with_special_character: 'a'}, - {'street': '456', 'city': 'SW', 'zip': 67890, column_name_with_special_character: 'b'}, - {'street': '789', 'city': 'Random', 'zip': 10112, column_name_with_special_character: 'c'}, + column_name_with_special_character: ["a", None, "z"], + "id": [1, 2, 3], + "name": ["AB", "CD", "EF"], + "address": [ + {"street": "123", "city": "SFO", "zip": 12345, column_name_with_special_character: "a"}, + {"street": "456", "city": "SW", "zip": 67890, column_name_with_special_character: "b"}, + {"street": "789", "city": "Random", "zip": 10112, column_name_with_special_character: "c"}, ], } pa_schema = pa.schema([ pa.field(column_name_with_special_character, pa.string()), - pa.field('id', pa.int32()), - pa.field('name', pa.string()), + pa.field("id", pa.int32()), + pa.field("name", pa.string()), pa.field( - 'address', + "address", pa.struct([ - pa.field('street', pa.string()), - pa.field('city', pa.string()), - pa.field('zip', pa.int32()), + pa.field("street", pa.string()), + pa.field("city", pa.string()), + pa.field("zip", pa.int32()), pa.field(column_name_with_special_character, pa.string()), ]), ), @@ -322,12 +322,12 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads( ) -> None: identifier = "default.python_writes_dictionary_encoded_column_with_spark_reads" TEST_DATA = { - 'id': [1, 2, 3, 1, 1], - 'name': ['AB', 'CD', 'EF', 'CD', 'EF'], + "id": [1, 2, 3, 1, 1], + "name": ["AB", "CD", "EF", "CD", "EF"], } pa_schema = pa.schema([ - pa.field('id', pa.dictionary(pa.int32(), pa.int32(), False)), - pa.field('name', pa.dictionary(pa.int32(), pa.string(), False)), + pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)), + pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)), ]) arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema) @@ -473,7 +473,7 @@ def test_write_parquet_unsupported_properties( @pytest.mark.integration def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_data_files" - tbl = _create_table(session_catalog, identifier, {'format-version': '1'}, []) + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, []) with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): tbl.overwrite("not a df") @@ -488,7 +488,7 @@ def test_summaries_with_only_nulls( ) -> None: identifier = "default.arrow_table_summaries_with_only_nulls" tbl = _create_table( - session_catalog, identifier, {'format-version': '1'}, [arrow_table_without_data, arrow_table_with_only_nulls] + session_catalog, identifier, {"format-version": "1"}, [arrow_table_without_data, arrow_table_with_only_nulls] ) tbl.overwrite(arrow_table_without_data) @@ -501,49 +501,49 @@ def test_summaries_with_only_nulls( ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append', 'overwrite'] + assert operations == ["append", "append", "overwrite"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'total-data-files': '0', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '0', - 'total-position-deletes': '0', - 'total-records': '0', + "total-data-files": "0", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "0", + "total-position-deletes": "0", + "total-records": "0", } assert summaries[1] == { - 'added-data-files': '1', - 'added-files-size': '4239', - 'added-records': '2', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '4239', - 'total-position-deletes': '0', - 'total-records': '2', + "added-data-files": "1", + "added-files-size": "4239", + "added-records": "2", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "4239", + "total-position-deletes": "0", + "total-records": "2", } assert summaries[2] == { - 'removed-files-size': '4239', - 'total-equality-deletes': '0', - 'total-position-deletes': '0', - 'deleted-data-files': '1', - 'total-delete-files': '0', - 'total-files-size': '0', - 'deleted-records': '2', - 'total-data-files': '0', - 'total-records': '0', + "removed-files-size": "4239", + "total-equality-deletes": "0", + "total-position-deletes": "0", + "deleted-data-files": "1", + "total-delete-files": "0", + "total-files-size": "0", + "deleted-records": "2", + "total-data-files": "0", + "total-records": "0", } @pytest.mark.integration def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> None: - os.environ['TZ'] = 'Etc/UTC' + os.environ["TZ"] = "Etc/UTC" time.tzset() - tz = pytz.timezone(os.environ['TZ']) + tz = pytz.timezone(os.environ["TZ"]) catalog = SqlCatalog("test_sql_catalog", uri="sqlite:///:memory:", warehouse=f"/{warehouse}") catalog.create_namespace("default") @@ -554,7 +554,7 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> import duckdb - duckdb.sql('INSTALL iceberg; LOAD iceberg;') + duckdb.sql("INSTALL iceberg; LOAD iceberg;") result = duckdb.sql( f""" SELECT * @@ -565,8 +565,8 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> assert result == [ ( False, - 'a', - 'aaaaaaaaaaaaaaaaaaaaaa', + "a", + "aaaaaaaaaaaaaaaaaaaaaa", 1, 1, 0.0, @@ -574,14 +574,14 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> datetime(2023, 1, 1, 19, 25), datetime(2023, 1, 1, 19, 25, tzinfo=tz), date(2023, 1, 1), - b'\x01', - b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b"\x01", + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ), (None, None, None, None, None, None, None, None, None, None, None, None), ( True, - 'z', - 'zzzzzzzzzzzzzzzzzzzzzz', + "z", + "zzzzzzzzzzzzzzzzzzzzzz", 9, 9, 0.8999999761581421, @@ -589,8 +589,8 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> datetime(2023, 3, 1, 19, 25), datetime(2023, 3, 1, 19, 25, tzinfo=tz), date(2023, 3, 1), - b'\x12', - b'\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11', + b"\x12", + b"\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11", ), ] @@ -607,7 +607,7 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) @@ -618,8 +618,8 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -653,15 +653,15 @@ def test_create_table_transaction(session_catalog: Catalog, format_version: int) pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -741,45 +741,45 @@ def test_inspect_snapshots( df = tbl.inspect.snapshots() assert df.column_names == [ - 'committed_at', - 'snapshot_id', - 'parent_id', - 'operation', - 'manifest_list', - 'summary', + "committed_at", + "snapshot_id", + "parent_id", + "operation", + "manifest_list", + "summary", ] - for committed_at in df['committed_at']: + for committed_at in df["committed_at"]: assert isinstance(committed_at.as_py(), datetime) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - assert df['parent_id'][0].as_py() is None - assert df['parent_id'][1:] == df['snapshot_id'][:2] + assert df["parent_id"][0].as_py() is None + assert df["parent_id"][1:] == df["snapshot_id"][:2] - assert [operation.as_py() for operation in df['operation']] == ['append', 'overwrite', 'append'] + assert [operation.as_py() for operation in df["operation"]] == ["append", "overwrite", "append"] - for manifest_list in df['manifest_list']: + for manifest_list in df["manifest_list"]: assert manifest_list.as_py().startswith("s3://") - assert df['summary'][0].as_py() == [ - ('added-files-size', '5459'), - ('added-data-files', '1'), - ('added-records', '3'), - ('total-data-files', '1'), - ('total-delete-files', '0'), - ('total-records', '3'), - ('total-files-size', '5459'), - ('total-position-deletes', '0'), - ('total-equality-deletes', '0'), + assert df["summary"][0].as_py() == [ + ("added-files-size", "5459"), + ("added-data-files", "1"), + ("added-records", "3"), + ("total-data-files", "1"), + ("total-delete-files", "0"), + ("total-records", "3"), + ("total-files-size", "5459"), + ("total-position-deletes", "0"), + ("total-equality-deletes", "0"), ] lhs = spark.table(f"{identifier}.snapshots").toPandas() rhs = df.to_pandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'summary': + if column == "summary": # Arrow returns a list of tuples, instead of a dict right = dict(right) @@ -838,7 +838,7 @@ def test_hive_catalog_storage_descriptor( @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_sanitize_character_partitioned(catalog: Catalog) -> None: table_name = "default.test_table_partitioned_sanitized_character" try: diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index 90f5b08bf0..ec511f959d 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -1215,13 +1215,13 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of results = [row.as_py() for row in result_table.columns[0]] assert results == [ [ - {'latitude': 52.371807, 'longitude': 4.896029, 'altitude': None}, - {'latitude': 52.387386, 'longitude': 4.646219, 'altitude': None}, + {"latitude": 52.371807, "longitude": 4.896029, "altitude": None}, + {"latitude": 52.387386, "longitude": 4.646219, "altitude": None}, ], [], [ - {'latitude': 52.078663, 'longitude': 4.288788, 'altitude': None}, - {'latitude': 52.387386, 'longitude': 4.646219, 'altitude': None}, + {"latitude": 52.078663, "longitude": 4.288788, "altitude": None}, + {"latitude": 52.387386, "longitude": 4.646219, "altitude": None}, ], ] assert ( diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index 46ad331aa0..c8571dacf1 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -329,7 +329,7 @@ def test_round_schema_large_string() -> None: def test_simple_schema_has_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), ]) visitor = _HasIds() has_ids = visit_pyarrow(schema, visitor) @@ -338,8 +338,8 @@ def test_simple_schema_has_missing_ids() -> None: def test_simple_schema_has_missing_ids_partial() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), - pa.field('bar', pa.int32(), nullable=False), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("bar", pa.int32(), nullable=False), ]) visitor = _HasIds() has_ids = visit_pyarrow(schema, visitor) @@ -348,9 +348,9 @@ def test_simple_schema_has_missing_ids_partial() -> None: def test_nested_schema_has_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -365,16 +365,16 @@ def test_nested_schema_has_missing_ids() -> None: def test_nested_schema_has_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), pa.field( - 'quux', + "quux", pa.map_( pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}), pa.field( "value", pa.map_( - pa.field('key', pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}), - pa.field('value', pa.int32(), metadata={"PARQUET:field_id": "10"}), + pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}), + pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}), ), nullable=False, metadata={"PARQUET:field_id": "8"}, @@ -391,14 +391,14 @@ def test_nested_schema_has_ids() -> None: def test_nested_schema_has_partial_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), pa.field( - 'quux', + "quux", pa.map_( pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}), pa.field( "value", - pa.map_(pa.field('key', pa.string(), nullable=False), pa.field('value', pa.int32())), + pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())), nullable=False, ), ), @@ -426,9 +426,9 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping( ) -> None: schema = pyarrow_schema_simple_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), ]) assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_simple @@ -439,7 +439,7 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping_partial_ ) -> None: schema = pyarrow_schema_simple_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), + MappedField(field_id=1, names=["foo"]), ]) with pytest.raises(ValueError) as exc_info: _ = pyarrow_to_schema(schema, name_mapping) @@ -452,45 +452,45 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping( schema = pyarrow_schema_nested_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=16, names=['name']), - MappedField(field_id=17, names=['age']), + MappedField(field_id=16, names=["name"]), + MappedField(field_id=17, names=["age"]), ], ), ]) @@ -500,9 +500,9 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping( def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_id() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -512,17 +512,17 @@ def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_ ]) name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), + MappedField(field_id=1, names=["foo"]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=10, names=['value']), + MappedField(field_id=10, names=["value"]), ], ), ], diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 2bc78f3197..11d50db8a5 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -995,9 +995,9 @@ def test_correct_schema() -> None: # Should use the current schema, instead the one from the snapshot projection_schema = t.scan().projection() assert projection_schema == Schema( - NestedField(field_id=1, name='x', field_type=LongType(), required=True), - NestedField(field_id=2, name='y', field_type=LongType(), required=True), - NestedField(field_id=3, name='z', field_type=LongType(), required=True), + NestedField(field_id=1, name="x", field_type=LongType(), required=True), + NestedField(field_id=2, name="y", field_type=LongType(), required=True), + NestedField(field_id=3, name="z", field_type=LongType(), required=True), identifier_field_ids=[1, 2], ) assert projection_schema.schema_id == 1 @@ -1005,7 +1005,7 @@ def test_correct_schema() -> None: # When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot projection_schema = t.scan(snapshot_id=123).projection() assert projection_schema == Schema( - NestedField(field_id=1, name='x', field_type=LongType(), required=True), + NestedField(field_id=1, name="x", field_type=LongType(), required=True), identifier_field_ids=[], ) assert projection_schema.schema_id == 0 @@ -1138,8 +1138,8 @@ def test_table_properties_raise_for_none_value(example_table_metadata_v2: Dict[s def test_serialize_commit_table_request() -> None: request = CommitTableRequest( - requirements=(AssertTableUUID(uuid='4bfd18a3-74c6-478e-98b1-71c4c32f4163'),), - identifier=TableIdentifier(namespace=['a'], name='b'), + requirements=(AssertTableUUID(uuid="4bfd18a3-74c6-478e-98b1-71c4c32f4163"),), + identifier=TableIdentifier(namespace=["a"], name="b"), ) deserialized_request = CommitTableRequest.model_validate_json(request.model_dump_json()) @@ -1149,17 +1149,17 @@ def test_serialize_commit_table_request() -> None: def test_partition_for_demo() -> None: import pyarrow as pa - test_pa_schema = pa.schema([('year', pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) + test_pa_schema = pa.schema([("year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) test_schema = Schema( - NestedField(field_id=1, name='year', field_type=StringType(), required=False), - NestedField(field_id=2, name='n_legs', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='animal', field_type=StringType(), required=False), + NestedField(field_id=1, name="year", field_type=StringType(), required=False), + NestedField(field_id=2, name="n_legs", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="animal", field_type=StringType(), required=False), schema_id=1, ) test_data = { - 'year': [2020, 2022, 2022, 2022, 2021, 2022, 2022, 2019, 2021], - 'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100], - 'animal': ["Flamingo", "Parrot", "Parrot", "Horse", "Dog", "Horse", "Horse", "Brittle stars", "Centipede"], + "year": [2020, 2022, 2022, 2022, 2021, 2022, 2022, 2019, 2021], + "n_legs": [2, 2, 2, 4, 4, 4, 4, 5, 100], + "animal": ["Flamingo", "Parrot", "Parrot", "Horse", "Dog", "Horse", "Horse", "Brittle stars", "Centipede"], } arrow_table = pa.Table.from_pydict(test_data, schema=test_pa_schema) partition_spec = PartitionSpec( @@ -1183,11 +1183,11 @@ def test_partition_for_demo() -> None: def test_identity_partition_on_multi_columns() -> None: import pyarrow as pa - test_pa_schema = pa.schema([('born_year', pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) + test_pa_schema = pa.schema([("born_year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) test_schema = Schema( - NestedField(field_id=1, name='born_year', field_type=StringType(), required=False), - NestedField(field_id=2, name='n_legs', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='animal', field_type=StringType(), required=False), + NestedField(field_id=1, name="born_year", field_type=StringType(), required=False), + NestedField(field_id=2, name="n_legs", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="animal", field_type=StringType(), required=False), schema_id=1, ) # 5 partitions, 6 unique row values, 12 rows @@ -1210,9 +1210,9 @@ def test_identity_partition_on_multi_columns() -> None: for _ in range(1000): random.shuffle(test_rows) test_data = { - 'born_year': [row[0] for row in test_rows], - 'n_legs': [row[1] for row in test_rows], - 'animal': [row[2] for row in test_rows], + "born_year": [row[0] for row in test_rows], + "n_legs": [row[1] for row in test_rows], + "animal": [row[2] for row in test_rows], } arrow_table = pa.Table.from_pydict(test_data, schema=test_pa_schema) @@ -1222,7 +1222,7 @@ def test_identity_partition_on_multi_columns() -> None: concatenated_arrow_table = pa.concat_tables([table_partition.arrow_table_partition for table_partition in result]) assert concatenated_arrow_table.num_rows == arrow_table.num_rows assert concatenated_arrow_table.sort_by([ - ('born_year', 'ascending'), - ('n_legs', 'ascending'), - ('animal', 'ascending'), - ]) == arrow_table.sort_by([('born_year', 'ascending'), ('n_legs', 'ascending'), ('animal', 'ascending')]) + ("born_year", "ascending"), + ("n_legs", "ascending"), + ("animal", "ascending"), + ]) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")]) diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index b4e30a6b84..0e2b91f24b 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -220,7 +220,7 @@ def test_new_table_metadata_with_explicit_v1_format() -> None: partition_spec=partition_spec, sort_order=sort_order, location="s3://some_v1_location/", - properties={'format-version': "1"}, + properties={"format-version": "1"}, ) expected_schema = Schema( diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index e039415ce3..d4a2bf6c41 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -30,45 +30,45 @@ @pytest.fixture(scope="session") def table_name_mapping_nested() -> NameMapping: return NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=16, names=['name']), - MappedField(field_id=17, names=['age']), + MappedField(field_id=16, names=["name"]), + MappedField(field_id=17, names=["age"]), ], ), ]) @@ -80,7 +80,7 @@ def test_json_mapped_field_deserialization() -> None: "names": ["id", "record_id"] } """ - assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field) + assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field) mapped_field_with_null_fields = """{ "field-id": 1, @@ -88,7 +88,7 @@ def test_json_mapped_field_deserialization() -> None: "fields": null } """ - assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field_with_null_fields) + assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field_with_null_fields) def test_json_name_mapping_deserialization() -> None: @@ -133,14 +133,14 @@ def test_json_name_mapping_deserialization() -> None: """ assert parse_mapping_from_json(name_mapping) == NameMapping([ - MappedField(field_id=1, names=['id', 'record_id']), - MappedField(field_id=2, names=['data']), + MappedField(field_id=1, names=["id", "record_id"]), + MappedField(field_id=2, names=["data"]), MappedField( - names=['location'], + names=["location"], field_id=3, fields=[ - MappedField(field_id=4, names=['latitude', 'lat']), - MappedField(field_id=5, names=['longitude', 'long']), + MappedField(field_id=4, names=["latitude", "lat"]), + MappedField(field_id=5, names=["longitude", "long"]), ], ), ]) @@ -155,14 +155,14 @@ def test_json_serialization(table_name_mapping_nested: NameMapping) -> None: def test_name_mapping_to_string() -> None: nm = NameMapping([ - MappedField(field_id=1, names=['id', 'record_id']), - MappedField(field_id=2, names=['data']), + MappedField(field_id=1, names=["id", "record_id"]), + MappedField(field_id=2, names=["data"]), MappedField( - names=['location'], + names=["location"], field_id=3, fields=[ - MappedField(field_id=4, names=['lat', 'latitude']), - MappedField(field_id=5, names=['long', 'longitude']), + MappedField(field_id=4, names=["lat", "latitude"]), + MappedField(field_id=5, names=["long", "longitude"]), ], ), ]) @@ -184,64 +184,64 @@ def test_mapping_from_schema(table_schema_nested: Schema, table_name_mapping_nes def test_mapping_by_name(table_name_mapping_nested: NameMapping) -> None: assert table_name_mapping_nested._field_by_name == { - 'person.age': MappedField(field_id=17, names=['age']), - 'person.name': MappedField(field_id=16, names=['name']), - 'person': MappedField( + "person.age": MappedField(field_id=17, names=["age"]), + "person.name": MappedField(field_id=16, names=["name"]), + "person": MappedField( field_id=15, - names=['person'], - fields=[MappedField(field_id=16, names=['name']), MappedField(field_id=17, names=['age'])], + names=["person"], + fields=[MappedField(field_id=16, names=["name"]), MappedField(field_id=17, names=["age"])], ), - 'location.element.longitude': MappedField(field_id=14, names=['longitude']), - 'location.element.latitude': MappedField(field_id=13, names=['latitude']), - 'location.element': MappedField( + "location.element.longitude": MappedField(field_id=14, names=["longitude"]), + "location.element.latitude": MappedField(field_id=13, names=["latitude"]), + "location.element": MappedField( field_id=12, - names=['element'], - fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])], + names=["element"], + fields=[MappedField(field_id=13, names=["latitude"]), MappedField(field_id=14, names=["longitude"])], ), - 'location': MappedField( + "location": MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], - fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])], + names=["element"], + fields=[MappedField(field_id=13, names=["latitude"]), MappedField(field_id=14, names=["longitude"])], ) ], ), - 'quux.value.value': MappedField(field_id=10, names=['value']), - 'quux.value.key': MappedField(field_id=9, names=['key']), - 'quux.value': MappedField( + "quux.value.value": MappedField(field_id=10, names=["value"]), + "quux.value.key": MappedField(field_id=9, names=["key"]), + "quux.value": MappedField( field_id=8, - names=['value'], - fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])], + names=["value"], + fields=[MappedField(field_id=9, names=["key"]), MappedField(field_id=10, names=["value"])], ), - 'quux.key': MappedField(field_id=7, names=['key']), - 'quux': MappedField( + "quux.key": MappedField(field_id=7, names=["key"]), + "quux": MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], - fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])], + names=["value"], + fields=[MappedField(field_id=9, names=["key"]), MappedField(field_id=10, names=["value"])], ), ], ), - 'qux.element': MappedField(field_id=5, names=['element']), - 'qux': MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), - 'baz': MappedField(field_id=3, names=['baz']), - 'bar': MappedField(field_id=2, names=['bar']), - 'foo': MappedField(field_id=1, names=['foo']), + "qux.element": MappedField(field_id=5, names=["element"]), + "qux": MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), + "baz": MappedField(field_id=3, names=["baz"]), + "bar": MappedField(field_id=2, names=["bar"]), + "foo": MappedField(field_id=1, names=["foo"]), } def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: - assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=['foo']) - assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=['latitude']) - assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=['latitude']) - assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=['latitude']) + assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=["foo"]) + assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=["latitude"]) + assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=["latitude"]) + assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=["latitude"]) with pytest.raises(ValueError, match="Could not find field with name: boom"): table_name_mapping_nested.find("boom") @@ -264,48 +264,48 @@ def test_update_mapping(table_name_mapping_nested: NameMapping) -> None: } expected = NameMapping([ - MappedField(field_id=1, names=['foo', 'foo_update']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo", "foo_update"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=17, names=['age']), - MappedField(field_id=19, names=['name']), - MappedField(field_id=20, names=['add_20']), + MappedField(field_id=17, names=["age"]), + MappedField(field_id=19, names=["name"]), + MappedField(field_id=20, names=["add_20"]), ], ), - MappedField(field_id=18, names=['add_18']), + MappedField(field_id=18, names=["add_18"]), ]) assert update_mapping(table_name_mapping_nested, updates, adds) == expected diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index e85ecce506..2569a11dc2 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -156,9 +156,9 @@ def test_snapshot_summary_collector(table_schema_simple: Schema) -> None: ssc.add_file(data_file, schema=table_schema_simple) assert ssc.build() == { - 'added-data-files': '1', - 'added-files-size': '1234', - 'added-records': '100', + "added-data-files": "1", + "added-files-size": "1234", + "added-records": "100", } @@ -174,7 +174,7 @@ def test_snapshot_summary_collector_with_partition() -> None: NestedField(field_id=2, name="string_field", field_type=StringType(), required=False), NestedField(field_id=3, name="int_field", field_type=IntegerType(), required=False), ) - spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name='int_field')) + spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name="int_field")) data_file_1 = DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1)) data_file_2 = DataFile(content=DataFileContent.DATA, record_count=200, file_size_in_bytes=4321, partition=Record(int_field=2)) # When @@ -184,13 +184,13 @@ def test_snapshot_summary_collector_with_partition() -> None: # Then assert ssc.build() == { - 'added-files-size': '1234', - 'removed-files-size': '5555', - 'added-data-files': '1', - 'deleted-data-files': '2', - 'added-records': '100', - 'deleted-records': '300', - 'changed-partition-count': '2', + "added-files-size": "1234", + "removed-files-size": "5555", + "added-data-files": "1", + "deleted-data-files": "2", + "added-records": "100", + "deleted-records": "300", + "changed-partition-count": "2", } # When @@ -198,15 +198,15 @@ def test_snapshot_summary_collector_with_partition() -> None: # Then assert ssc.build() == { - 'added-files-size': '1234', - 'removed-files-size': '5555', - 'added-data-files': '1', - 'deleted-data-files': '2', - 'added-records': '100', - 'deleted-records': '300', - 'changed-partition-count': '2', - 'partitions.int_field=1': 'added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100', - 'partitions.int_field=2': 'removed-files-size=4321,deleted-data-files=1,deleted-records=200', + "added-files-size": "1234", + "removed-files-size": "5555", + "added-data-files": "1", + "deleted-data-files": "2", + "added-records": "100", + "deleted-records": "300", + "changed-partition-count": "2", + "partitions.int_field=1": "added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100", + "partitions.int_field=2": "removed-files-size=4321,deleted-data-files=1,deleted-records=200", } @@ -214,12 +214,12 @@ def test_merge_snapshot_summaries_empty() -> None: assert update_snapshot_summaries(Summary(Operation.APPEND)) == Summary( operation=Operation.APPEND, **{ - 'total-data-files': '0', - 'total-delete-files': '0', - 'total-records': '0', - 'total-files-size': '0', - 'total-position-deletes': '0', - 'total-equality-deletes': '0', + "total-data-files": "0", + "total-delete-files": "0", + "total-records": "0", + "total-files-size": "0", + "total-position-deletes": "0", + "total-equality-deletes": "0", }, ) @@ -229,12 +229,12 @@ def test_merge_snapshot_summaries_new_summary() -> None: summary=Summary( operation=Operation.APPEND, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ) ) @@ -242,18 +242,18 @@ def test_merge_snapshot_summaries_new_summary() -> None: expected = Summary( operation=Operation.APPEND, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', - 'total-data-files': '1', - 'total-delete-files': '2', - 'total-records': '6', - 'total-files-size': '4', - 'total-position-deletes': '5', - 'total-equality-deletes': '3', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", + "total-data-files": "1", + "total-delete-files": "2", + "total-records": "6", + "total-files-size": "4", + "total-position-deletes": "5", + "total-equality-deletes": "3", }, ) @@ -265,44 +265,44 @@ def test_merge_snapshot_summaries_overwrite_summary() -> None: summary=Summary( operation=Operation.OVERWRITE, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ), previous_summary={ - 'total-data-files': '1', - 'total-delete-files': '1', - 'total-equality-deletes': '1', - 'total-files-size': '1', - 'total-position-deletes': '1', - 'total-records': '1', + "total-data-files": "1", + "total-delete-files": "1", + "total-equality-deletes": "1", + "total-files-size": "1", + "total-position-deletes": "1", + "total-records": "1", }, truncate_full_table=True, ) expected = { - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', - 'total-data-files': '1', - 'total-records': '6', - 'total-delete-files': '2', - 'total-equality-deletes': '3', - 'total-files-size': '4', - 'total-position-deletes': '5', - 'deleted-data-files': '1', - 'removed-delete-files': '1', - 'deleted-records': '1', - 'removed-files-size': '1', - 'removed-position-deletes': '1', - 'removed-equality-deletes': '1', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", + "total-data-files": "1", + "total-records": "6", + "total-delete-files": "2", + "total-equality-deletes": "3", + "total-files-size": "4", + "total-position-deletes": "5", + "deleted-data-files": "1", + "removed-delete-files": "1", + "deleted-records": "1", + "removed-files-size": "1", + "removed-position-deletes": "1", + "removed-equality-deletes": "1", } assert actual.additional_properties == expected @@ -324,15 +324,15 @@ def test_invalid_type() -> None: summary=Summary( operation=Operation.OVERWRITE, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ), - previous_summary={'total-data-files': 'abc'}, # should be a number + previous_summary={"total-data-files": "abc"}, # should be a number truncate_full_table=True, ) diff --git a/tests/test_serializers.py b/tests/test_serializers.py index 140db02700..ad40ea08e0 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -44,7 +44,7 @@ def test_legacy_current_snapshot_id( ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) with PyArrowFileIO().new_input(location=metadata_location).open() as input_stream: metadata_json_bytes = input_stream.read() - assert json.loads(metadata_json_bytes)['current-snapshot-id'] == -1 + assert json.loads(metadata_json_bytes)["current-snapshot-id"] == -1 backwards_compatible_static_table = StaticTable.from_metadata(metadata_location) assert backwards_compatible_static_table.metadata.current_snapshot_id is None assert backwards_compatible_static_table.metadata == static_table.metadata diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 4dc3d9819f..b8bef4b998 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1550,7 +1550,7 @@ def test_strict_bucket_bytes(bound_reference_binary: BoundReference[int]) -> Non def test_strict_bucket_uuid(bound_reference_uuid: BoundReference[int]) -> None: - value = literal(UUID('12345678123456781234567812345678')) + value = literal(UUID("12345678123456781234567812345678")) transform: Transform[Any, int] = BucketTransform(num_buckets=10) _test_projection( lhs=transform.strict_project(name="name", pred=BoundNotEqualTo(term=bound_reference_uuid, literal=value)), @@ -1575,14 +1575,14 @@ def test_strict_bucket_uuid(bound_reference_uuid: BoundReference[int]) -> None: _test_projection( lhs=transform.strict_project( name="name", - pred=BoundNotIn(term=bound_reference_uuid, literals={value, literal(UUID('12345678123456781234567812345679'))}), + pred=BoundNotIn(term=bound_reference_uuid, literals={value, literal(UUID("12345678123456781234567812345679"))}), ), rhs=NotIn(term=Reference("name"), literals={1, 4}), ) _test_projection( lhs=transform.strict_project( name="name", - pred=BoundIn(term=bound_reference_uuid, literals={value, literal(UUID('12345678123456781234567812345679'))}), + pred=BoundIn(term=bound_reference_uuid, literals={value, literal(UUID("12345678123456781234567812345679"))}), ), rhs=None, ) diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index 2f15bb56d8..066e7d7cc0 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -50,8 +50,8 @@ def test_from_environment_variables_uppercase() -> None: ) def test_fix_nested_objects_from_environment_variables() -> None: assert Config().get_catalog_config("PRODUCTION") == { - 's3.region': 'eu-north-1', - 's3.access-key-id': 'username', + "s3.region": "eu-north-1", + "s3.access-key-id": "username", } diff --git a/tests/utils/test_decimal.py b/tests/utils/test_decimal.py index 419cf05916..3e67bf691a 100644 --- a/tests/utils/test_decimal.py +++ b/tests/utils/test_decimal.py @@ -45,5 +45,5 @@ def test_decimal_required_bytes() -> None: def test_decimal_to_bytes() -> None: # Check the boundary between 2 and 3 bytes. # 2 bytes has a minimum of -32,768 and a maximum value of 32,767 (inclusive). - assert decimal_to_bytes(Decimal('32767.')) == b'\x7f\xff' - assert decimal_to_bytes(Decimal('32768.')) == b'\x00\x80\x00' + assert decimal_to_bytes(Decimal("32767.")) == b"\x7f\xff" + assert decimal_to_bytes(Decimal("32768.")) == b"\x00\x80\x00" From 91973f25a240f4e9c35b917fb7124e8ac12a6ecb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 06:44:02 +0200 Subject: [PATCH 23/68] Bump moto from 5.0.8 to 5.0.9 (#783) Bumps [moto](https://github.com/getmoto/moto) from 5.0.8 to 5.0.9. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.0.8...5.0.9) --- updated-dependencies: - dependency-name: moto dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7931ee0e38..adacbf7179 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2212,13 +2212,13 @@ test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] [[package]] name = "moto" -version = "5.0.8" +version = "5.0.9" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "moto-5.0.8-py2.py3-none-any.whl", hash = "sha256:7d1035e366434bfa9fcc0621f07d5aa724b6846408071d540137a0554c46f214"}, - {file = "moto-5.0.8.tar.gz", hash = "sha256:517fb808dc718bcbdda54c6ffeaca0adc34cf6e10821bfb01216ce420a31765c"}, + {file = "moto-5.0.9-py2.py3-none-any.whl", hash = "sha256:21a13e02f83d6a18cfcd99949c96abb2e889f4bd51c4c6a3ecc8b78765cb854e"}, + {file = "moto-5.0.9.tar.gz", hash = "sha256:eb71f1cba01c70fff1f16086acb24d6d9aeb32830d646d8989f98a29aeae24ba"}, ] [package.dependencies] From 0339e7fc82b71adc7d2a3de025b1b689ca9a2770 Mon Sep 17 00:00:00 2001 From: Honah J Date: Fri, 31 May 2024 00:27:13 -0700 Subject: [PATCH 24/68] Support CreateTableTransaction for SqlCatalog (#684) --- pyiceberg/catalog/sql.py | 104 +++++++++++++++++++++++--------------- tests/catalog/test_sql.py | 60 ++++++++++++++++++++++ 2 files changed, 124 insertions(+), 40 deletions(-) diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 6c198767e7..ff7831d77f 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -60,7 +60,7 @@ from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.serializers import FromInputFile -from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table from pyiceberg.table.metadata import new_table_metadata from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties @@ -402,59 +402,83 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons identifier_tuple = self.identifier_to_tuple_without_catalog( tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) ) - current_table = self.load_table(identifier_tuple) namespace_tuple = Catalog.namespace_from(identifier_tuple) namespace = Catalog.namespace_to_string(namespace_tuple) table_name = Catalog.table_name_from(identifier_tuple) - base_metadata = current_table.metadata - for requirement in table_request.requirements: - requirement.validate(base_metadata) - updated_metadata = update_table_metadata(base_metadata, table_request.updates) - if updated_metadata == base_metadata: - # no changes, do nothing - return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) + current_table: Optional[Table] + try: + current_table = self.load_table(identifier_tuple) + except NoSuchTableError: + current_table = None - # write new metadata - new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 - new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) - self._write_metadata(updated_metadata, current_table.io, new_metadata_location) + updated_staged_table = self._update_and_stage_table(current_table, table_request) + if current_table and updated_staged_table.metadata == current_table.metadata: + # no changes, do nothing + return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location) + self._write_metadata( + metadata=updated_staged_table.metadata, + io=updated_staged_table.io, + metadata_path=updated_staged_table.metadata_location, + ) with Session(self.engine) as session: - if self.engine.dialect.supports_sane_rowcount: - stmt = ( - update(IcebergTables) - .where( - IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == namespace, - IcebergTables.table_name == table_name, - IcebergTables.metadata_location == current_table.metadata_location, - ) - .values(metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location) - ) - result = session.execute(stmt) - if result.rowcount < 1: - raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") - else: - try: - tbl = ( - session.query(IcebergTables) - .with_for_update(of=IcebergTables) - .filter( + if current_table: + # table exists, update it + if self.engine.dialect.supports_sane_rowcount: + stmt = ( + update(IcebergTables) + .where( IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, IcebergTables.metadata_location == current_table.metadata_location, ) - .one() + .values( + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=current_table.metadata_location, + ) ) - tbl.metadata_location = new_metadata_location - tbl.previous_metadata_location = current_table.metadata_location - except NoResultFound as e: - raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e - session.commit() + result = session.execute(stmt) + if result.rowcount < 1: + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") + else: + try: + tbl = ( + session.query(IcebergTables) + .with_for_update(of=IcebergTables) + .filter( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == namespace, + IcebergTables.table_name == table_name, + IcebergTables.metadata_location == current_table.metadata_location, + ) + .one() + ) + tbl.metadata_location = updated_staged_table.metadata_location + tbl.previous_metadata_location = current_table.metadata_location + except NoResultFound as e: + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e + session.commit() + else: + # table does not exist, create it + try: + session.add( + IcebergTables( + catalog_name=self.name, + table_namespace=namespace, + table_name=table_name, + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=None, + ) + ) + session.commit() + except IntegrityError as e: + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e - return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) + return CommitTableResponse( + metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location + ) def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool: namespace_tuple = Catalog.identifier_to_tuple(identifier) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 6dc498233e..545916223a 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -1350,6 +1350,66 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: snapshot_update.append_data_file(data_file) +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> None: + identifier = f"default.arrow_create_table_transaction_{catalog.name}_{format_version}" + try: + catalog.create_namespace("default") + except NamespaceAlreadyExistsError: + pass + + try: + catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + pa_table = pa.Table.from_pydict( + { + "foo": ["a", None, "z"], + }, + schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + ) + + pa_table_with_column = pa.Table.from_pydict( + { + "foo": ["a", None, "z"], + "bar": [19, None, 25], + }, + schema=pa.schema([ + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + ]), + ) + + with catalog.create_table_transaction( + identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)} + ) as txn: + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files(table_metadata=txn.table_metadata, df=pa_table, io=txn._table.io): + snapshot_update.append_data_file(data_file) + + with txn.update_schema() as schema_txn: + schema_txn.union_by_name(pa_table_with_column.schema) + + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files( + table_metadata=txn.table_metadata, df=pa_table_with_column, io=txn._table.io + ): + snapshot_update.append_data_file(data_file) + + tbl = catalog.load_table(identifier=identifier) + assert tbl.format_version == format_version + assert len(tbl.scan().to_arrow()) == 6 + + @pytest.mark.parametrize( "catalog", [ From 84a2c043870111937e2802132486d8eb5979570e Mon Sep 17 00:00:00 2001 From: Honah J Date: Fri, 31 May 2024 00:33:03 -0700 Subject: [PATCH 25/68] Support CreateTableTransaction for HiveCatalog (#683) --- pyiceberg/catalog/__init__.py | 2 +- pyiceberg/catalog/hive.py | 161 +++++++++++-------- tests/integration/test_writes/test_writes.py | 16 +- 3 files changed, 104 insertions(+), 75 deletions(-) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index ea2bc65760..9a951b5c8e 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -761,7 +761,7 @@ def _create_staged_table( metadata = new_table_metadata( location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties ) - io = load_file_io(properties=self.properties, location=metadata_location) + io = self._load_file_io(properties=properties, location=metadata_location) return StagedTable( identifier=(self.name, database_name, table_name), metadata=metadata, diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 13b57b6ea9..83bbd50779 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -70,11 +70,11 @@ NamespaceNotEmptyError, NoSuchIcebergTableError, NoSuchNamespaceError, + NoSuchPropertyException, NoSuchTableError, TableAlreadyExistsError, WaitingForLockException, ) -from pyiceberg.io import FileIO, load_file_io from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema, SchemaVisitor, visit from pyiceberg.serializers import FromInputFile @@ -82,11 +82,10 @@ CommitTableRequest, CommitTableResponse, PropertyUtil, + StagedTable, Table, TableProperties, - update_table_metadata, ) -from pyiceberg.table.metadata import new_table_metadata from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties from pyiceberg.types import ( @@ -272,10 +271,12 @@ def __init__(self, name: str, **properties: str): DEFAULT_LOCK_CHECK_RETRIES, ) - def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: + def _convert_hive_into_iceberg(self, table: HiveTable) -> Table: properties: Dict[str, str] = table.parameters if TABLE_TYPE not in properties: - raise NoSuchTableError(f"Property table_type missing, could not determine type: {table.dbName}.{table.tableName}") + raise NoSuchPropertyException( + f"Property table_type missing, could not determine type: {table.dbName}.{table.tableName}" + ) table_type = properties[TABLE_TYPE] if table_type.lower() != ICEBERG: @@ -286,8 +287,9 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: if prop_metadata_location := properties.get(METADATA_LOCATION): metadata_location = prop_metadata_location else: - raise NoSuchTableError(f"Table property {METADATA_LOCATION} is missing") + raise NoSuchPropertyException(f"Table property {METADATA_LOCATION} is missing") + io = self._load_file_io(location=metadata_location) file = io.new_input(metadata_location) metadata = FromInputFile.table_metadata(file) return Table( @@ -298,6 +300,38 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: catalog=self, ) + def _convert_iceberg_into_hive(self, table: Table) -> HiveTable: + identifier_tuple = self.identifier_to_tuple_without_catalog(table.identifier) + database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + current_time_millis = int(time.time() * 1000) + + return HiveTable( + dbName=database_name, + tableName=table_name, + owner=table.properties[OWNER] if table.properties and OWNER in table.properties else getpass.getuser(), + createTime=current_time_millis // 1000, + lastAccessTime=current_time_millis // 1000, + sd=_construct_hive_storage_descriptor( + table.schema(), + table.location(), + PropertyUtil.property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), + ), + tableType=EXTERNAL_TABLE, + parameters=_construct_parameters(table.metadata_location), + ) + + def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None: + try: + open_client.create_table(hive_table) + except AlreadyExistsException as e: + raise TableAlreadyExistsError(f"Table {hive_table.dbName}.{hive_table.tableName} already exists") from e + + def _get_hive_table(self, open_client: Client, database_name: str, table_name: str) -> HiveTable: + try: + return open_client.get_table(dbname=database_name, tbl_name=table_name) + except NoSuchObjectException as e: + raise NoSuchTableError(f"Table does not exists: {table_name}") from e + def create_table( self, identifier: Union[str, Identifier], @@ -324,45 +358,25 @@ def create_table( AlreadyExistsError: If a table with the name already exists. ValueError: If the identifier is invalid. """ - schema: Schema = self._convert_schema_if_needed(schema) # type: ignore - properties = {**DEFAULT_PROPERTIES, **properties} - database_name, table_name = self.identifier_to_database_and_table(identifier) - current_time_millis = int(time.time() * 1000) - - location = self._resolve_table_location(location, database_name, table_name) - - metadata_location = self._get_metadata_location(location=location) - metadata = new_table_metadata( - location=location, + staged_table = self._create_staged_table( + identifier=identifier, schema=schema, + location=location, partition_spec=partition_spec, sort_order=sort_order, properties=properties, ) - io = load_file_io({**self.properties, **properties}, location=location) - self._write_metadata(metadata, io, metadata_location) + database_name, table_name = self.identifier_to_database_and_table(identifier) - tbl = HiveTable( - dbName=database_name, - tableName=table_name, - owner=properties[OWNER] if properties and OWNER in properties else getpass.getuser(), - createTime=current_time_millis // 1000, - lastAccessTime=current_time_millis // 1000, - sd=_construct_hive_storage_descriptor( - schema, location, PropertyUtil.property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT) - ), - tableType=EXTERNAL_TABLE, - parameters=_construct_parameters(metadata_location), - ) - try: - with self._client as open_client: - open_client.create_table(tbl) - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - except AlreadyExistsException as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + self._write_metadata(staged_table.metadata, staged_table.io, staged_table.metadata_location) + tbl = self._convert_iceberg_into_hive(staged_table) + + with self._client as open_client: + self._create_hive_table(open_client, tbl) + hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - return self._convert_hive_into_iceberg(hive_table, io) + return self._convert_hive_into_iceberg(hive_table) def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: """Register a new table using existing metadata. @@ -437,36 +451,52 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons else: raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location) - current_table = self._convert_hive_into_iceberg(hive_table, io) - - base_metadata = current_table.metadata - for requirement in table_request.requirements: - requirement.validate(base_metadata) - - updated_metadata = update_table_metadata(base_metadata, table_request.updates) - if updated_metadata == base_metadata: + hive_table: Optional[HiveTable] + current_table: Optional[Table] + try: + hive_table = self._get_hive_table(open_client, database_name, table_name) + current_table = self._convert_hive_into_iceberg(hive_table) + except NoSuchTableError: + hive_table = None + current_table = None + + updated_staged_table = self._update_and_stage_table(current_table, table_request) + if current_table and updated_staged_table.metadata == current_table.metadata: # no changes, do nothing - return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) - - # write new metadata - new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 - new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) - self._write_metadata(updated_metadata, current_table.io, new_metadata_location) - - hive_table.parameters = _construct_parameters( - metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location + return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location) + self._write_metadata( + metadata=updated_staged_table.metadata, + io=updated_staged_table.io, + metadata_path=updated_staged_table.metadata_location, ) - open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table) - except NoSuchObjectException as e: - raise NoSuchTableError(f"Table does not exist: {table_name}") from e + + if hive_table and current_table: + # Table exists, update it. + hive_table.parameters = _construct_parameters( + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=current_table.metadata_location, + ) + open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table) + else: + # Table does not exist, create it. + hive_table = self._convert_iceberg_into_hive( + StagedTable( + identifier=(self.name, database_name, table_name), + metadata=updated_staged_table.metadata, + metadata_location=updated_staged_table.metadata_location, + io=updated_staged_table.io, + catalog=self, + ) + ) + self._create_hive_table(open_client, hive_table) except WaitingForLockException as e: raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") from e finally: open_client.unlock(UnlockRequest(lockid=lock.lockid)) - return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) + return CommitTableResponse( + metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location + ) def load_table(self, identifier: Union[str, Identifier]) -> Table: """Load the table's metadata and return the table instance. @@ -485,14 +515,11 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) - try: - with self._client as open_client: - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - except NoSuchObjectException as e: - raise NoSuchTableError(f"Table does not exists: {table_name}") from e - io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location) - return self._convert_hive_into_iceberg(hive_table, io) + with self._client as open_client: + hive_table = self._get_hive_table(open_client, database_name, table_name) + + return self._convert_hive_into_iceberg(hive_table) def drop_table(self, identifier: Union[str, Identifier]) -> None: """Drop a table. diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 0941b35850..e329adcd5c 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -34,6 +34,7 @@ from pyiceberg.catalog import Catalog from pyiceberg.catalog.hive import HiveCatalog +from pyiceberg.catalog.rest import RestCatalog from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.partitioning import PartitionField, PartitionSpec @@ -637,17 +638,18 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None @pytest.mark.integration -@pytest.mark.parametrize("format_version", [2]) -def test_create_table_transaction(session_catalog: Catalog, format_version: int) -> None: - if format_version == 1: +@pytest.mark.parametrize("format_version", [1, 2]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_create_table_transaction(catalog: Catalog, format_version: int) -> None: + if format_version == 1 and isinstance(catalog, RestCatalog): pytest.skip( "There is a bug in the REST catalog (maybe server side) that prevents create and commit a staged version 1 table" ) - identifier = f"default.arrow_create_table_transaction{format_version}" + identifier = f"default.arrow_create_table_transaction_{catalog.name}_{format_version}" try: - session_catalog.drop_table(identifier=identifier) + catalog.drop_table(identifier=identifier) except NoSuchTableError: pass @@ -669,7 +671,7 @@ def test_create_table_transaction(session_catalog: Catalog, format_version: int) ]), ) - with session_catalog.create_table_transaction( + with catalog.create_table_transaction( identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)} ) as txn: with txn.update_snapshot().fast_append() as snapshot_update: @@ -685,7 +687,7 @@ def test_create_table_transaction(session_catalog: Catalog, format_version: int) ): snapshot_update.append_data_file(data_file) - tbl = session_catalog.load_table(identifier=identifier) + tbl = catalog.load_table(identifier=identifier) assert tbl.format_version == format_version assert len(tbl.scan().to_arrow()) == 6 From 8d79664d3a6010a92468bfbee1a55283591d7800 Mon Sep 17 00:00:00 2001 From: Yothin M <689679+yothinix@users.noreply.github.com> Date: Fri, 31 May 2024 15:00:53 +0700 Subject: [PATCH 26/68] Support viewfs scheme along side with hdfs (#777) --- pyiceberg/io/__init__.py | 1 + pyiceberg/io/pyarrow.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 1a78f306c6..9143cf6650 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -284,6 +284,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None: "gs": [ARROW_FILE_IO], "file": [ARROW_FILE_IO, FSSPEC_FILE_IO], "hdfs": [ARROW_FILE_IO], + "viewfs": [ARROW_FILE_IO], "abfs": [FSSPEC_FILE_IO], "abfss": [FSSPEC_FILE_IO], } diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 9216c37f15..04f30ec63e 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -332,7 +332,7 @@ def parse_location(location: str) -> Tuple[str, str, str]: uri = urlparse(location) if not uri.scheme: return "file", uri.netloc, os.path.abspath(location) - elif uri.scheme == "hdfs": + elif uri.scheme in ("hdfs", "viewfs"): return uri.scheme, uri.netloc, uri.path else: return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}" @@ -356,12 +356,12 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste client_kwargs["connect_timeout"] = float(connect_timeout) return S3FileSystem(**client_kwargs) - elif scheme == "hdfs": + elif scheme in ("hdfs", "viewfs"): from pyarrow.fs import HadoopFileSystem hdfs_kwargs: Dict[str, Any] = {} if netloc: - return HadoopFileSystem.from_uri(f"hdfs://{netloc}") + return HadoopFileSystem.from_uri(f"{scheme}://{netloc}") if host := self.properties.get(HDFS_HOST): hdfs_kwargs["host"] = host if port := self.properties.get(HDFS_PORT): From 20f6afdf5f000ea5b167e804012f2000aa5b8573 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 31 May 2024 17:19:40 +0200 Subject: [PATCH 27/68] Update `fsspec.py`to respect `s3.signer.uri property` (#741) * Update fsspec.py to respect s3.signer.uri property * Add S3_SIGNER_URI constant, add docs --------- Co-authored-by: Fokko Driesprong --- mkdocs/docs/configuration.md | 1 + pyiceberg/io/__init__.py | 1 + pyiceberg/io/fsspec.py | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index c0879b1d28..f8a69119c8 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -89,6 +89,7 @@ For the FileIO there are several configuration options available: | s3.access-key-id | admin | Configure the static secret access key used to access the FileIO. | | s3.secret-access-key | password | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 9143cf6650..36c3e625c8 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -53,6 +53,7 @@ S3_REGION = "s3.region" S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" +S3_SIGNER_URI = "s3.signer.uri" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 1089c9fe50..bb76f043c9 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -63,6 +63,7 @@ S3_REGION, S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, + S3_SIGNER_URI, ADLFS_ClIENT_SECRET, FileIO, InputFile, @@ -79,7 +80,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A if TOKEN not in properties: raise SignError("Signer set, but token is not available") - signer_url = properties["uri"].rstrip("/") + signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { "method": request.method, From 65a03d2667ac073778b03d99d6580149a2abb326 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Fri, 31 May 2024 16:11:35 -0400 Subject: [PATCH 28/68] Support Appends with TimeTransform Partitions (#784) * checkpoint * checkpoint2 * todo: sort with pyarrow_transform vals * checkpoint * checkpoint * fix * tests * more tests * adopt review feedback * comment * checkpoint * checkpoint2 * todo: sort with pyarrow_transform vals * checkpoint * checkpoint * fix * tests * more tests * adopt review feedback * comment * rebase --- pyiceberg/partitioning.py | 2 +- pyiceberg/table/__init__.py | 67 +++--- pyiceberg/transforms.py | 99 ++++++++- tests/conftest.py | 43 ++++ .../test_writes/test_partitioned_writes.py | 201 ++++++++++++++++-- tests/test_transforms.py | 34 ++- 6 files changed, 392 insertions(+), 54 deletions(-) diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index 481207db7a..da52d5df8e 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -387,7 +387,7 @@ def partition(self) -> Record: # partition key transformed with iceberg interna for raw_partition_field_value in self.raw_partition_field_values: partition_fields = self.partition_spec.source_id_to_fields_map[raw_partition_field_value.field.source_id] if len(partition_fields) != 1: - raise ValueError("partition_fields must contain exactly one field.") + raise ValueError(f"Cannot have redundant partitions: {partition_fields}") partition_field = partition_fields[0] iceberg_typed_key_values[partition_field.name] = partition_record_value( partition_field=partition_field, diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index aa108de08b..f160ab2441 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -392,10 +392,11 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) if not isinstance(df, pa.Table): raise ValueError(f"Expected PyArrow table, got: {df}") - supported_transforms = {IdentityTransform} - if not all(type(field.transform) in supported_transforms for field in self.table_metadata.spec().fields): + if unsupported_partitions := [ + field for field in self.table_metadata.spec().fields if not field.transform.supports_pyarrow_transform + ]: raise ValueError( - f"All transforms are not supported, expected: {supported_transforms}, but get: {[str(field) for field in self.table_metadata.spec().fields if field.transform not in supported_transforms]}." + f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}." ) _check_schema_compatible(self._table.schema(), other_schema=df.schema) @@ -3643,33 +3644,6 @@ class TablePartition: arrow_table_partition: pa.Table -def _get_partition_sort_order(partition_columns: list[str], reverse: bool = False) -> dict[str, Any]: - order = "ascending" if not reverse else "descending" - null_placement = "at_start" if reverse else "at_end" - return {"sort_keys": [(column_name, order) for column_name in partition_columns], "null_placement": null_placement} - - -def group_by_partition_scheme(arrow_table: pa.Table, partition_columns: list[str]) -> pa.Table: - """Given a table, sort it by current partition scheme.""" - # only works for identity for now - sort_options = _get_partition_sort_order(partition_columns, reverse=False) - sorted_arrow_table = arrow_table.sort_by(sorting=sort_options["sort_keys"], null_placement=sort_options["null_placement"]) - return sorted_arrow_table - - -def get_partition_columns( - spec: PartitionSpec, - schema: Schema, -) -> list[str]: - partition_cols = [] - for partition_field in spec.fields: - column_name = schema.find_column_name(partition_field.source_id) - if not column_name: - raise ValueError(f"{partition_field=} could not be found in {schema}.") - partition_cols.append(column_name) - return partition_cols - - def _get_table_partitions( arrow_table: pa.Table, partition_spec: PartitionSpec, @@ -3724,13 +3698,30 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T """ import pyarrow as pa - partition_columns = get_partition_columns(spec=spec, schema=schema) - arrow_table = group_by_partition_scheme(arrow_table, partition_columns) - - reversing_sort_order_options = _get_partition_sort_order(partition_columns, reverse=True) - reversed_indices = pa.compute.sort_indices(arrow_table, **reversing_sort_order_options).to_pylist() - - slice_instructions: list[dict[str, Any]] = [] + partition_columns: List[Tuple[PartitionField, NestedField]] = [ + (partition_field, schema.find_field(partition_field.source_id)) for partition_field in spec.fields + ] + partition_values_table = pa.table({ + str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name]) + for partition, field in partition_columns + }) + + # Sort by partitions + sort_indices = pa.compute.sort_indices( + partition_values_table, + sort_keys=[(col, "ascending") for col in partition_values_table.column_names], + null_placement="at_end", + ).to_pylist() + arrow_table = arrow_table.take(sort_indices) + + # Get slice_instructions to group by partitions + partition_values_table = partition_values_table.take(sort_indices) + reversed_indices = pa.compute.sort_indices( + partition_values_table, + sort_keys=[(col, "descending") for col in partition_values_table.column_names], + null_placement="at_start", + ).to_pylist() + slice_instructions: List[Dict[str, Any]] = [] last = len(reversed_indices) reversed_indices_size = len(reversed_indices) ptr = 0 @@ -3741,6 +3732,6 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T last = reversed_indices[ptr] ptr = ptr + group_size - table_partitions: list[TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions) + table_partitions: List[TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions) return table_partitions diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 6dcae59e49..38cc6221a2 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from enum import IntEnum from functools import singledispatch -from typing import Any, Callable, Generic, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Generic, Optional, TypeVar from typing import Literal as LiteralType from uuid import UUID @@ -82,6 +82,9 @@ from pyiceberg.utils.parsing import ParseNumberFromBrackets from pyiceberg.utils.singleton import Singleton +if TYPE_CHECKING: + import pyarrow as pa + S = TypeVar("S") T = TypeVar("T") @@ -175,6 +178,13 @@ def __eq__(self, other: Any) -> bool: return self.root == other.root return False + @property + def supports_pyarrow_transform(self) -> bool: + return False + + @abstractmethod + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": ... + class BucketTransform(Transform[S, int]): """Base Transform class to transform a value into a bucket partition value. @@ -290,6 +300,9 @@ def __repr__(self) -> str: """Return the string representation of the BucketTransform class.""" return f"BucketTransform(num_buckets={self._num_buckets})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + class TimeResolution(IntEnum): YEAR = 6 @@ -349,6 +362,10 @@ def dedup_name(self) -> str: def preserves_order(self) -> bool: return True + @property + def supports_pyarrow_transform(self) -> bool: + return True + class YearTransform(TimeTransform[S]): """Transforms a datetime value into a year value. @@ -391,6 +408,21 @@ def __repr__(self) -> str: """Return the string representation of the YearTransform class.""" return "YearTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply year transform for type: {source}") + + return lambda v: pc.years_between(pa.scalar(epoch), v) if v is not None else None + class MonthTransform(TimeTransform[S]): """Transforms a datetime value into a month value. @@ -433,6 +465,27 @@ def __repr__(self) -> str: """Return the string representation of the MonthTransform class.""" return "MonthTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply month transform for type: {source}") + + def month_func(v: pa.Array) -> pa.Array: + return pc.add( + pc.multiply(pc.years_between(pa.scalar(epoch), v), pa.scalar(12)), + pc.add(pc.month(v), pa.scalar(-1)), + ) + + return lambda v: month_func(v) if v is not None else None + class DayTransform(TimeTransform[S]): """Transforms a datetime value into a day value. @@ -478,6 +531,21 @@ def __repr__(self) -> str: """Return the string representation of the DayTransform class.""" return "DayTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply day transform for type: {source}") + + return lambda v: pc.days_between(pa.scalar(epoch), v) if v is not None else None + class HourTransform(TimeTransform[S]): """Transforms a datetime value into a hour value. @@ -515,6 +583,19 @@ def __repr__(self) -> str: """Return the string representation of the HourTransform class.""" return "HourTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply hour transform for type: {source}") + + return lambda v: pc.hours_between(pa.scalar(epoch), v) if v is not None else None + def _base64encode(buffer: bytes) -> str: """Convert bytes to base64 string.""" @@ -585,6 +666,13 @@ def __repr__(self) -> str: """Return the string representation of the IdentityTransform class.""" return "IdentityTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + return lambda v: v + + @property + def supports_pyarrow_transform(self) -> bool: + return True + class TruncateTransform(Transform[S, S]): """A transform for truncating a value to a specified width. @@ -725,6 +813,9 @@ def __repr__(self) -> str: """Return the string representation of the TruncateTransform class.""" return f"TruncateTransform(width={self._width})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + @singledispatch def _human_string(value: Any, _type: IcebergType) -> str: @@ -807,6 +898,9 @@ def __repr__(self) -> str: """Return the string representation of the UnknownTransform class.""" return f"UnknownTransform(transform={repr(self._transform)})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + class VoidTransform(Transform[S, None], Singleton): """A transform that always returns None.""" @@ -835,6 +929,9 @@ def __repr__(self) -> str: """Return the string representation of the VoidTransform class.""" return "VoidTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + def _truncate_number( name: str, pred: BoundLiteralPredicate[L], transform: Callable[[Optional[L]], Optional[L]] diff --git a/tests/conftest.py b/tests/conftest.py index 01915b7d82..d3f23689a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2158,3 +2158,46 @@ def arrow_table_with_only_nulls(pa_schema: "pa.Schema") -> "pa.Table": import pyarrow as pa return pa.Table.from_pylist([{}, {}], schema=pa_schema) + + +@pytest.fixture(scope="session") +def arrow_table_date_timestamps() -> "pa.Table": + """Pyarrow table with only date, timestamp and timestamptz values.""" + import pyarrow as pa + + return pa.Table.from_pydict( + { + "date": [date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), date(2024, 2, 1), None], + "timestamp": [ + datetime(2023, 12, 31, 0, 0, 0), + datetime(2024, 1, 1, 0, 0, 0), + datetime(2024, 1, 31, 0, 0, 0), + datetime(2024, 2, 1, 0, 0, 0), + datetime(2024, 2, 1, 6, 0, 0), + None, + ], + "timestamptz": [ + datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 31, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 2, 1, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 2, 1, 6, 0, 0, tzinfo=timezone.utc), + None, + ], + }, + schema=pa.schema([ + ("date", pa.date32()), + ("timestamp", pa.timestamp(unit="us")), + ("timestamptz", pa.timestamp(unit="us", tz="UTC")), + ]), + ) + + +@pytest.fixture(scope="session") +def arrow_table_date_timestamps_schema() -> Schema: + """Pyarrow table Schema with only date, timestamp and timestamptz values.""" + return Schema( + NestedField(field_id=1, name="date", field_type=DateType(), required=False), + NestedField(field_id=2, name="timestamp", field_type=TimestampType(), required=False), + NestedField(field_id=3, name="timestamptz", field_type=TimestamptzType(), required=False), + ) diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py index 5cb03e59d8..76d559ca57 100644 --- a/tests/integration/test_writes/test_partitioned_writes.py +++ b/tests/integration/test_writes/test_partitioned_writes.py @@ -16,6 +16,10 @@ # under the License. # pylint:disable=redefined-outer-name + +from datetime import date +from typing import Any, Set + import pyarrow as pa import pytest from pyspark.sql import SparkSession @@ -23,12 +27,14 @@ from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.partitioning import PartitionField, PartitionSpec +from pyiceberg.schema import Schema from pyiceberg.transforms import ( BucketTransform, DayTransform, HourTransform, IdentityTransform, MonthTransform, + Transform, TruncateTransform, YearTransform, ) @@ -351,18 +357,6 @@ def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog) -> Non (PartitionSpec(PartitionField(source_id=5, field_id=1001, transform=TruncateTransform(2), name="long_trunc"))), (PartitionSpec(PartitionField(source_id=2, field_id=1001, transform=TruncateTransform(2), name="string_trunc"))), (PartitionSpec(PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(2), name="binary_trunc"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=YearTransform(), name="timestamp_year"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=YearTransform(), name="timestamptz_year"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=YearTransform(), name="date_year"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=MonthTransform(), name="timestamp_month"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=MonthTransform(), name="timestamptz_month"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=MonthTransform(), name="date_month"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=DayTransform(), name="timestamp_day"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=DayTransform(), name="timestamptz_day"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=DayTransform(), name="date_day"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=HourTransform(), name="timestamp_hour"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=HourTransform(), name="timestamptz_hour"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=HourTransform(), name="date_hour"))), ], ) def test_unsupported_transform( @@ -382,5 +376,186 @@ def test_unsupported_transform( properties={"format-version": "1"}, ) - with pytest.raises(ValueError, match="All transforms are not supported.*"): + with pytest.raises( + ValueError, + match="Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: *", + ): tbl.append(arrow_table_with_null) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "transform,expected_rows", + [ + pytest.param(YearTransform(), 2, id="year_transform"), + pytest.param(MonthTransform(), 3, id="month_transform"), + pytest.param(DayTransform(), 3, id="day_transform"), + ], +) +@pytest.mark.parametrize("part_col", ["date", "timestamp", "timestamptz"]) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_ymd_transform_partitioned( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_with_null: pa.Table, + transform: Transform[Any, Any], + expected_rows: int, + part_col: str, + format_version: int, +) -> None: + # Given + identifier = f"default.arrow_table_v{format_version}_with_{str(transform)}_partition_on_col_{part_col}" + nested_field = TABLE_SCHEMA.find_field(part_col) + partition_spec = PartitionSpec( + PartitionField(source_id=nested_field.field_id, field_id=1001, transform=transform, name=part_col) + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_with_null], + partition_spec=partition_spec, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 3, f"Expected 3 total rows for {identifier}" + for col in TEST_DATA_WITH_NULL.keys(): + assert df.where(f"{col} is not null").count() == 2, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + assert tbl.inspect.partitions().num_rows == expected_rows + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == expected_rows + + +@pytest.mark.integration +@pytest.mark.parametrize( + "transform,expected_partitions", + [ + pytest.param(YearTransform(), {53, 54, None}, id="year_transform"), + pytest.param(MonthTransform(), {647, 648, 649, None}, id="month_transform"), + pytest.param( + DayTransform(), {date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), None}, id="day_transform" + ), + pytest.param(HourTransform(), {473328, 473352, 474072, 474096, 474102, None}, id="hour_transform"), + ], +) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_transform_partition_verify_partitions_count( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_date_timestamps: pa.Table, + arrow_table_date_timestamps_schema: Schema, + transform: Transform[Any, Any], + expected_partitions: Set[Any], + format_version: int, +) -> None: + # Given + part_col = "timestamptz" + identifier = f"default.arrow_table_v{format_version}_with_{str(transform)}_transform_partitioned_on_col_{part_col}" + nested_field = arrow_table_date_timestamps_schema.find_field(part_col) + partition_spec = PartitionSpec( + PartitionField(source_id=nested_field.field_id, field_id=1001, transform=transform, name=part_col), + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_date_timestamps], + partition_spec=partition_spec, + schema=arrow_table_date_timestamps_schema, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 6, f"Expected 6 total rows for {identifier}" + for col in arrow_table_date_timestamps.column_names: + assert df.where(f"{col} is not null").count() == 5, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + partitions_table = tbl.inspect.partitions() + assert partitions_table.num_rows == len(expected_partitions) + assert {part[part_col] for part in partitions_table["partition"].to_pylist()} == expected_partitions + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == len(expected_partitions) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_multiple_partitions( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_date_timestamps: pa.Table, + arrow_table_date_timestamps_schema: Schema, + format_version: int, +) -> None: + # Given + identifier = f"default.arrow_table_v{format_version}_with_multiple_partitions" + partition_spec = PartitionSpec( + PartitionField( + source_id=arrow_table_date_timestamps_schema.find_field("date").field_id, + field_id=1001, + transform=YearTransform(), + name="date_year", + ), + PartitionField( + source_id=arrow_table_date_timestamps_schema.find_field("timestamptz").field_id, + field_id=1000, + transform=HourTransform(), + name="timestamptz_hour", + ), + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_date_timestamps], + partition_spec=partition_spec, + schema=arrow_table_date_timestamps_schema, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 6, f"Expected 6 total rows for {identifier}" + for col in arrow_table_date_timestamps.column_names: + assert df.where(f"{col} is not null").count() == 5, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + partitions_table = tbl.inspect.partitions() + assert partitions_table.num_rows == 6 + partitions = partitions_table["partition"].to_pylist() + assert {(part["date_year"], part["timestamptz_hour"]) for part in partitions} == { + (53, 473328), + (54, 473352), + (54, 474072), + (54, 474096), + (54, 474102), + (None, None), + } + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == 6 diff --git a/tests/test_transforms.py b/tests/test_transforms.py index b8bef4b998..3a9ffd6009 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -17,7 +17,7 @@ # pylint: disable=eval-used,protected-access,redefined-outer-name from datetime import date from decimal import Decimal -from typing import Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional from uuid import UUID import mmh3 as mmh3 @@ -69,6 +69,7 @@ TimestampLiteral, literal, ) +from pyiceberg.partitioning import _to_partition_representation from pyiceberg.schema import Accessor from pyiceberg.transforms import ( BucketTransform, @@ -111,6 +112,9 @@ timestamptz_to_micros, ) +if TYPE_CHECKING: + import pyarrow as pa + @pytest.mark.parametrize( "test_input,test_type,expected", @@ -1808,3 +1812,31 @@ def test_strict_binary(bound_reference_binary: BoundReference[str]) -> None: _test_projection( lhs=transform.strict_project(name="name", pred=BoundIn(term=bound_reference_binary, literals=set_of_literals)), rhs=None ) + + +@pytest.mark.parametrize( + "transform", + [ + pytest.param(YearTransform(), id="year_transform"), + pytest.param(MonthTransform(), id="month_transform"), + pytest.param(DayTransform(), id="day_transform"), + pytest.param(HourTransform(), id="hour_transform"), + ], +) +@pytest.mark.parametrize( + "source_col, source_type", [("date", DateType()), ("timestamp", TimestampType()), ("timestamptz", TimestamptzType())] +) +def test_ymd_pyarrow_transforms( + arrow_table_date_timestamps: "pa.Table", + source_col: str, + source_type: PrimitiveType, + transform: Transform[Any, Any], +) -> None: + if transform.can_transform(source_type): + assert transform.pyarrow_transform(source_type)(arrow_table_date_timestamps[source_col]).to_pylist() == [ + transform.transform(source_type)(_to_partition_representation(source_type, v)) + for v in arrow_table_date_timestamps[source_col].to_pylist() + ] + else: + with pytest.raises(ValueError): + transform.pyarrow_transform(DateType())(arrow_table_date_timestamps[source_col]) From 31c6c23d428a3237589ebada2b4cd64bf37b1aef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 2 Jun 2024 20:30:51 +0200 Subject: [PATCH 29/68] Bump mypy-boto3-glue from 1.34.110 to 1.34.115 (#780) Bumps [mypy-boto3-glue](https://github.com/youtype/mypy_boto3_builder) from 1.34.110 to 1.34.115. - [Release notes](https://github.com/youtype/mypy_boto3_builder/releases) - [Commits](https://github.com/youtype/mypy_boto3_builder/commits) --- updated-dependencies: - dependency-name: mypy-boto3-glue dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index adacbf7179..f31671936d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2490,13 +2490,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.110" -description = "Type annotations for boto3.Glue 1.34.110 service generated with mypy-boto3-builder 7.24.0" +version = "1.34.115" +description = "Type annotations for boto3.Glue 1.34.115 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.110-py3-none-any.whl", hash = "sha256:795eca329426bf1ae3dc95090cccafcd7b3d91c4c594dac4db1fd9d6c72390c9"}, - {file = "mypy_boto3_glue-1.34.110.tar.gz", hash = "sha256:80d39849ac10ad9d57d85b94016fce8caba2cb70a3544b5b8b9bf0713ab3a041"}, + {file = "mypy_boto3_glue-1.34.115-py3-none-any.whl", hash = "sha256:d1c4633e81381f0e048e65a6f9fd5dcc221e05d18f928d12b74c6ec981ff72a7"}, + {file = "mypy_boto3_glue-1.34.115.tar.gz", hash = "sha256:dae8c44e3237801f80debf424f17b3d9dfd30aa3425fbe270dd87c572d539d9f"}, ] [package.dependencies] From e61ef5770b4d73e683e2c78bebdd6c2165102a6b Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:26:56 -0400 Subject: [PATCH 30/68] Add `include_field_ids` flag in `schema_to_pyarrow` (#789) * include_field_ids flag * include_field_ids flag --- pyiceberg/io/pyarrow.py | 25 +++++++++++------- tests/io/test_pyarrow.py | 57 ++++++++++++++++++++-------------------- 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 04f30ec63e..71925c27cd 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -469,15 +469,18 @@ def __setstate__(self, state: Dict[str, Any]) -> None: self.fs_by_scheme = lru_cache(self._initialize_fs) -def schema_to_pyarrow(schema: Union[Schema, IcebergType], metadata: Dict[bytes, bytes] = EMPTY_DICT) -> pa.schema: - return visit(schema, _ConvertToArrowSchema(metadata)) +def schema_to_pyarrow( + schema: Union[Schema, IcebergType], metadata: Dict[bytes, bytes] = EMPTY_DICT, include_field_ids: bool = True +) -> pa.schema: + return visit(schema, _ConvertToArrowSchema(metadata, include_field_ids)) class _ConvertToArrowSchema(SchemaVisitorPerPrimitiveType[pa.DataType]): _metadata: Dict[bytes, bytes] - def __init__(self, metadata: Dict[bytes, bytes] = EMPTY_DICT) -> None: + def __init__(self, metadata: Dict[bytes, bytes] = EMPTY_DICT, include_field_ids: bool = True) -> None: self._metadata = metadata + self._include_field_ids = include_field_ids def schema(self, _: Schema, struct_result: pa.StructType) -> pa.schema: return pa.schema(list(struct_result), metadata=self._metadata) @@ -486,13 +489,17 @@ def struct(self, _: StructType, field_results: List[pa.DataType]) -> pa.DataType return pa.struct(field_results) def field(self, field: NestedField, field_result: pa.DataType) -> pa.Field: + metadata = {} + if field.doc: + metadata[PYARROW_FIELD_DOC_KEY] = field.doc + if self._include_field_ids: + metadata[PYARROW_PARQUET_FIELD_ID_KEY] = str(field.field_id) + return pa.field( name=field.name, type=field_result, nullable=field.optional, - metadata={PYARROW_FIELD_DOC_KEY: field.doc, PYARROW_PARQUET_FIELD_ID_KEY: str(field.field_id)} - if field.doc - else {PYARROW_PARQUET_FIELD_ID_KEY: str(field.field_id)}, + metadata=metadata, ) def list(self, list_type: ListType, element_result: pa.DataType) -> pa.DataType: @@ -1130,7 +1137,7 @@ def project_table( tables = [f.result() for f in completed_futures if f.result()] if len(tables) < 1: - return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema)) + return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema, include_field_ids=False)) result = pa.concat_tables(tables) @@ -1161,7 +1168,7 @@ def __init__(self, file_schema: Schema): def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array: file_field = self.file_schema.find_field(field.field_id) if field.field_type.is_primitive and field.field_type != file_field.field_type: - return values.cast(schema_to_pyarrow(promote(file_field.field_type, field.field_type))) + return values.cast(schema_to_pyarrow(promote(file_field.field_type, field.field_type), include_field_ids=False)) return values def _construct_field(self, field: NestedField, arrow_type: pa.DataType) -> pa.Field: @@ -1188,7 +1195,7 @@ def struct( field_arrays.append(array) fields.append(self._construct_field(field, array.type)) elif field.optional: - arrow_type = schema_to_pyarrow(field.field_type) + arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=False) field_arrays.append(pa.nulls(len(struct_array), type=arrow_type)) fields.append(self._construct_field(field, arrow_type)) else: diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index ec511f959d..baa9e30824 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -344,7 +344,7 @@ def test_deleting_hdfs_file_not_found() -> None: assert "Cannot delete file, does not exist:" in str(exc_info.value) -def test_schema_to_pyarrow_schema(table_schema_nested: Schema) -> None: +def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None: actual = schema_to_pyarrow(table_schema_nested) expected = """foo: string -- field metadata -- @@ -402,6 +402,30 @@ def test_schema_to_pyarrow_schema(table_schema_nested: Schema) -> None: assert repr(actual) == expected +def test_schema_to_pyarrow_schema_exclude_field_ids(table_schema_nested: Schema) -> None: + actual = schema_to_pyarrow(table_schema_nested, include_field_ids=False) + expected = """foo: string +bar: int32 not null +baz: bool +qux: list not null + child 0, element: string not null +quux: map> not null + child 0, entries: struct not null> not null + child 0, key: string not null + child 1, value: map not null + child 0, entries: struct not null + child 0, key: string not null + child 1, value: int32 not null +location: list not null> not null + child 0, element: struct not null + child 0, latitude: float + child 1, longitude: float +person: struct + child 0, name: string + child 1, age: int32 not null""" + assert repr(actual) == expected + + def test_fixed_type_to_pyarrow() -> None: length = 22 iceberg_type = FixedType(length) @@ -945,23 +969,13 @@ def test_projection_add_column(file_int: str) -> None: == """id: int32 list: list child 0, element: int32 - -- field metadata -- - PARQUET:field_id: '21' map: map child 0, entries: struct not null child 0, key: int32 not null - -- field metadata -- - PARQUET:field_id: '31' child 1, value: string - -- field metadata -- - PARQUET:field_id: '32' location: struct child 0, lat: double - -- field metadata -- - PARQUET:field_id: '41' - child 1, lon: double - -- field metadata -- - PARQUET:field_id: '42'""" + child 1, lon: double""" ) @@ -1014,11 +1028,7 @@ def test_projection_add_column_struct(schema_int: Schema, file_int: str) -> None == """id: map child 0, entries: struct not null child 0, key: int32 not null - -- field metadata -- - PARQUET:field_id: '3' - child 1, value: string - -- field metadata -- - PARQUET:field_id: '4'""" + child 1, value: string""" ) @@ -1062,12 +1072,7 @@ def test_projection_concat_files(schema_int: Schema, file_int: str) -> None: def test_projection_filter(schema_int: Schema, file_int: str) -> None: result_table = project(schema_int, [file_int], GreaterThan("id", 4)) assert len(result_table.columns[0]) == 0 - assert ( - repr(result_table.schema) - == """id: int32 - -- field metadata -- - PARQUET:field_id: '1'""" - ) + assert repr(result_table.schema) == """id: int32""" def test_projection_filter_renamed_column(file_int: str) -> None: @@ -1304,11 +1309,7 @@ def test_projection_nested_struct_different_parent_id(file_struct: str) -> None: repr(result_table.schema) == """location: struct child 0, lat: double - -- field metadata -- - PARQUET:field_id: '41' - child 1, long: double - -- field metadata -- - PARQUET:field_id: '42'""" + child 1, long: double""" ) From 18448fde67a285021d5d627dbe9a7ef5d8b533b0 Mon Sep 17 00:00:00 2001 From: Chinmay Bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Mon, 3 Jun 2024 22:10:47 +0530 Subject: [PATCH 31/68] Support getting snapshot at or right before the given timestamp (#748) --- pyiceberg/table/__init__.py | 12 ++++++++++++ pyiceberg/table/snapshots.py | 16 +++++++++++++++- tests/table/test_init.py | 37 ++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index f160ab2441..2d4b342461 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1302,6 +1302,18 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None + def snapshot_as_of_timestamp(self, timestamp_ms: int, inclusive: bool = True) -> Optional[Snapshot]: + """Get the snapshot that was current as of or right before the given timestamp, or None if there is no matching snapshot. + + Args: + timestamp_ms: Find snapshot that was current at/before this timestamp + inclusive: Includes timestamp_ms in search when True. Excludes timestamp_ms when False + """ + for log_entry in reversed(self.history()): + if (inclusive and log_entry.timestamp_ms <= timestamp_ms) or log_entry.timestamp_ms < timestamp_ms: + return self.snapshot_by_id(log_entry.snapshot_id) + return None + def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" return self.metadata.snapshot_log diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index e2ce3fe4f1..b21a0f5613 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import time from collections import defaultdict from enum import Enum -from typing import Any, DefaultDict, Dict, List, Mapping, Optional +from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional from pydantic import Field, PrivateAttr, model_serializer @@ -25,6 +27,9 @@ from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, read_manifest_list from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema + +if TYPE_CHECKING: + from pyiceberg.table.metadata import TableMetadata from pyiceberg.typedef import IcebergBaseModel ADDED_DATA_FILES = "added-data-files" @@ -412,3 +417,12 @@ def _update_totals(total_property: str, added_property: str, removed_property: s def set_when_positive(properties: Dict[str, str], num: int, property_name: str) -> None: if num > 0: properties[property_name] = str(num) + + +def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]: + """Get the ancestors of and including the given snapshot.""" + if current_snapshot: + yield current_snapshot + if current_snapshot.parent_snapshot_id is not None: + if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): + yield from ancestors_of(parent, table_metadata) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 11d50db8a5..20b77b6abd 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -76,6 +76,7 @@ Snapshot, SnapshotLogEntry, Summary, + ancestors_of, ) from pyiceberg.table.sorting import ( NullOrder, @@ -204,6 +205,42 @@ def test_snapshot_by_id(table_v2: Table) -> None: ) +def test_snapshot_by_timestamp(table_v2: Table) -> None: + assert table_v2.snapshot_as_of_timestamp(1515100955770) == Snapshot( + snapshot_id=3051729675574597004, + parent_snapshot_id=None, + sequence_number=0, + timestamp_ms=1515100955770, + manifest_list="s3://a/b/1.avro", + summary=Summary(Operation.APPEND), + schema_id=None, + ) + assert table_v2.snapshot_as_of_timestamp(1515100955770, inclusive=False) is None + + +def test_ancestors_of(table_v2: Table) -> None: + assert list(ancestors_of(table_v2.current_snapshot(), table_v2.metadata)) == [ + Snapshot( + snapshot_id=3055729675574597004, + parent_snapshot_id=3051729675574597004, + sequence_number=1, + timestamp_ms=1555100955770, + manifest_list="s3://a/b/2.avro", + summary=Summary(Operation.APPEND), + schema_id=1, + ), + Snapshot( + snapshot_id=3051729675574597004, + parent_snapshot_id=None, + sequence_number=0, + timestamp_ms=1515100955770, + manifest_list="s3://a/b/1.avro", + summary=Summary(Operation.APPEND), + schema_id=None, + ), + ] + + def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None: assert table_v2.snapshot_by_id(-1) is None From a09b04cbc2ca7b919a84fb6440421078071af6d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:56:18 -0700 Subject: [PATCH 32/68] Bump duckdb from 0.10.3 to 1.0.0 (#793) Bumps [duckdb](https://github.com/duckdb/duckdb) from 0.10.3 to 1.0.0. - [Release notes](https://github.com/duckdb/duckdb/releases) - [Changelog](https://github.com/duckdb/duckdb/blob/main/tools/release-pip.py) - [Commits](https://github.com/duckdb/duckdb/compare/v0.10.3...v1.0.0) --- updated-dependencies: - dependency-name: duckdb dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 97 +++++++++++++++++++++++++------------------------- pyproject.toml | 2 +- 2 files changed, 49 insertions(+), 50 deletions(-) diff --git a/poetry.lock b/poetry.lock index f31671936d..eddcc76e29 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1030,58 +1030,57 @@ files = [ [[package]] name = "duckdb" -version = "0.10.3" +version = "1.0.0" description = "DuckDB in-process database" optional = true python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, - {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, - {file = "duckdb-0.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b48f5f1542f1e4b184e6b4fc188f497be8b9c48127867e7d9a5f4a3e334f88b0"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e327f7a3951ea154bb56e3fef7da889e790bd9a67ca3c36afc1beb17d3feb6d6"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d8b20ed67da004b4481973f4254fd79a0e5af957d2382eac8624b5c527ec48c"}, - {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d37680b8d7be04e4709db3a66c8b3eb7ceba2a5276574903528632f2b2cc2e60"}, - {file = "duckdb-0.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d34b86d6a2a6dfe8bb757f90bfe7101a3bd9e3022bf19dbddfa4b32680d26a9"}, - {file = "duckdb-0.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:73b1cb283ca0f6576dc18183fd315b4e487a545667ffebbf50b08eb4e8cdc143"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d917dde19fcec8cadcbef1f23946e85dee626ddc133e1e3f6551f15a61a03c61"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46757e0cf5f44b4cb820c48a34f339a9ccf83b43d525d44947273a585a4ed822"}, - {file = "duckdb-0.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:338c14d8ac53ac4aa9ec03b6f1325ecfe609ceeb72565124d489cb07f8a1e4eb"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:651fcb429602b79a3cf76b662a39e93e9c3e6650f7018258f4af344c816dab72"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3ae3c73b98b6215dab93cc9bc936b94aed55b53c34ba01dec863c5cab9f8e25"}, - {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56429b2cfe70e367fb818c2be19f59ce2f6b080c8382c4d10b4f90ba81f774e9"}, - {file = "duckdb-0.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b46c02c2e39e3676b1bb0dc7720b8aa953734de4fd1b762e6d7375fbeb1b63af"}, - {file = "duckdb-0.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:bcd460feef56575af2c2443d7394d405a164c409e9794a4d94cb5fdaa24a0ba4"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e229a7c6361afbb0d0ab29b1b398c10921263c52957aefe3ace99b0426fdb91e"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:732b1d3b6b17bf2f32ea696b9afc9e033493c5a3b783c292ca4b0ee7cc7b0e66"}, - {file = "duckdb-0.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5380d4db11fec5021389fb85d614680dc12757ef7c5881262742250e0b58c75"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:468a4e0c0b13c55f84972b1110060d1b0f854ffeb5900a178a775259ec1562db"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa1e7ff8d18d71defa84e79f5c86aa25d3be80d7cb7bc259a322de6d7cc72da"}, - {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed1063ed97c02e9cf2e7fd1d280de2d1e243d72268330f45344c69c7ce438a01"}, - {file = "duckdb-0.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:22f2aad5bb49c007f3bfcd3e81fdedbc16a2ae41f2915fc278724ca494128b0c"}, - {file = "duckdb-0.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:8f9e2bb00a048eb70b73a494bdc868ce7549b342f7ffec88192a78e5a4e164bd"}, - {file = "duckdb-0.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6c2fc49875b4b54e882d68703083ca6f84b27536d57d623fc872e2f502b1078"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a66c125d0c30af210f7ee599e7821c3d1a7e09208196dafbf997d4e0cfcb81ab"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99dd7a1d901149c7a276440d6e737b2777e17d2046f5efb0c06ad3b8cb066a6"}, - {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ec3bbdb209e6095d202202893763e26c17c88293b88ef986b619e6c8b6715bd"}, - {file = "duckdb-0.10.3-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:2b3dec4ef8ed355d7b7230b40950b30d0def2c387a2e8cd7efc80b9d14134ecf"}, - {file = "duckdb-0.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:04129f94fb49bba5eea22f941f0fb30337f069a04993048b59e2811f52d564bc"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d75d67024fc22c8edfd47747c8550fb3c34fb1cbcbfd567e94939ffd9c9e3ca7"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3796e9507c02d0ddbba2e84c994fae131da567ce3d9cbb4cbcd32fadc5fbb26"}, - {file = "duckdb-0.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78e539d85ebd84e3e87ec44d28ad912ca4ca444fe705794e0de9be3dd5550c11"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a99b67ac674b4de32073e9bc604b9c2273d399325181ff50b436c6da17bf00a"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1209a354a763758c4017a1f6a9f9b154a83bed4458287af9f71d84664ddb86b6"}, - {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b735cea64aab39b67c136ab3a571dbf834067f8472ba2f8bf0341bc91bea820"}, - {file = "duckdb-0.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:816ffb9f758ed98eb02199d9321d592d7a32a6cb6aa31930f4337eb22cfc64e2"}, - {file = "duckdb-0.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:1631184b94c3dc38b13bce4045bf3ae7e1b0ecbfbb8771eb8d751d8ffe1b59b3"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb98c35fc8dd65043bc08a2414dd9f59c680d7e8656295b8969f3f2061f26c52"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e75c9f5b6a92b2a6816605c001d30790f6d67ce627a2b848d4d6040686efdf9"}, - {file = "duckdb-0.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae786eddf1c2fd003466e13393b9348a44b6061af6fe7bcb380a64cac24e7df7"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9387da7b7973707b0dea2588749660dd5dd724273222680e985a2dd36787668"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:538f943bf9fa8a3a7c4fafa05f21a69539d2c8a68e557233cbe9d989ae232899"}, - {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6930608f35025a73eb94252964f9f19dd68cf2aaa471da3982cf6694866cfa63"}, - {file = "duckdb-0.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:03bc54a9cde5490918aad82d7d2a34290e3dfb78d5b889c6626625c0f141272a"}, - {file = "duckdb-0.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:372b6e3901d85108cafe5df03c872dfb6f0dbff66165a0cf46c47246c1957aa0"}, - {file = "duckdb-0.10.3.tar.gz", hash = "sha256:c5bd84a92bc708d3a6adffe1f554b94c6e76c795826daaaf482afc3d9c636971"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4a8ce2d1f9e1c23b9bab3ae4ca7997e9822e21563ff8f646992663f66d050211"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:19797670f20f430196e48d25d082a264b66150c264c1e8eae8e22c64c2c5f3f5"}, + {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b71c342090fe117b35d866a91ad6bffce61cd6ff3e0cff4003f93fc1506da0d8"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dd69f44ad212c35ae2ea736b0e643ea2b70f204b8dff483af1491b0e2a4cec"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8da5f293ecb4f99daa9a9352c5fd1312a6ab02b464653a0c3a25ab7065c45d4d"}, + {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3207936da9967ddbb60644ec291eb934d5819b08169bc35d08b2dedbe7068c60"}, + {file = "duckdb-1.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1128d6c9c33e883b1f5df6b57c1eb46b7ab1baf2650912d77ee769aaa05111f9"}, + {file = "duckdb-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:02310d263474d0ac238646677feff47190ffb82544c018b2ff732a4cb462c6ef"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:75586791ab2702719c284157b65ecefe12d0cca9041da474391896ddd9aa71a4"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:83bb415fc7994e641344f3489e40430ce083b78963cb1057bf714ac3a58da3ba"}, + {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:bee2e0b415074e84c5a2cefd91f6b5ebeb4283e7196ba4ef65175a7cef298b57"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5a4110d2a499312609544ad0be61e85a5cdad90e5b6d75ad16b300bf075b90"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa389e6a382d4707b5f3d1bc2087895925ebb92b77e9fe3bfb23c9b98372fdc"}, + {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ede6f5277dd851f1a4586b0c78dc93f6c26da45e12b23ee0e88c76519cbdbe0"}, + {file = "duckdb-1.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0b88cdbc0d5c3e3d7545a341784dc6cafd90fc035f17b2f04bf1e870c68456e5"}, + {file = "duckdb-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd1693cdd15375156f7fff4745debc14e5c54928589f67b87fb8eace9880c370"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c65a7fe8a8ce21b985356ee3ec0c3d3b3b2234e288e64b4cfb03356dbe6e5583"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:e5a8eda554379b3a43b07bad00968acc14dd3e518c9fbe8f128b484cf95e3d16"}, + {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:a1b6acdd54c4a7b43bd7cb584975a1b2ff88ea1a31607a2b734b17960e7d3088"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a677bb1b6a8e7cab4a19874249d8144296e6e39dae38fce66a80f26d15e670df"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:752e9d412b0a2871bf615a2ede54be494c6dc289d076974eefbf3af28129c759"}, + {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aadb99d098c5e32d00dc09421bc63a47134a6a0de9d7cd6abf21780b678663c"}, + {file = "duckdb-1.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83b7091d4da3e9301c4f9378833f5ffe934fb1ad2b387b439ee067b2c10c8bb0"}, + {file = "duckdb-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:6a8058d0148b544694cb5ea331db44f6c2a00a7b03776cc4dd1470735c3d5ff7"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e40cb20e5ee19d44bc66ec99969af791702a049079dc5f248c33b1c56af055f4"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7bce1bc0de9af9f47328e24e6e7e39da30093179b1c031897c042dd94a59c8e"}, + {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8355507f7a04bc0a3666958f4414a58e06141d603e91c0fa5a7c50e49867fb6d"}, + {file = "duckdb-1.0.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:39f1a46f5a45ad2886dc9b02ce5b484f437f90de66c327f86606d9ba4479d475"}, + {file = "duckdb-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d29ba477b27ae41676b62c8fae8d04ee7cbe458127a44f6049888231ca58fa"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:1bea713c1925918714328da76e79a1f7651b2b503511498ccf5e007a7e67d49e"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_universal2.whl", hash = "sha256:bfe67f3bcf181edbf6f918b8c963eb060e6aa26697d86590da4edc5707205450"}, + {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:dbc6093a75242f002be1d96a6ace3fdf1d002c813e67baff52112e899de9292f"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba1881a2b11c507cee18f8fd9ef10100be066fddaa2c20fba1f9a664245cd6d8"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:445d0bb35087c522705c724a75f9f1c13f1eb017305b694d2686218d653c8142"}, + {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:224553432e84432ffb9684f33206572477049b371ce68cc313a01e214f2fbdda"}, + {file = "duckdb-1.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d3914032e47c4e76636ad986d466b63fdea65e37be8a6dfc484ed3f462c4fde4"}, + {file = "duckdb-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:af9128a2eb7e1bb50cd2c2020d825fb2946fdad0a2558920cd5411d998999334"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dd2659a5dbc0df0de68f617a605bf12fe4da85ba24f67c08730984a0892087e8"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_universal2.whl", hash = "sha256:ac5a4afb0bc20725e734e0b2c17e99a274de4801aff0d4e765d276b99dad6d90"}, + {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c5a53bee3668d6e84c0536164589d5127b23d298e4c443d83f55e4150fafe61"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b980713244d7708b25ee0a73de0c65f0e5521c47a0e907f5e1b933d79d972ef6"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbd4f9fe7b7a56eff96c3f4d6778770dd370469ca2212eddbae5dd63749db5"}, + {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed228167c5d49888c5ef36f6f9cbf65011c2daf9dcb53ea8aa7a041ce567b3e4"}, + {file = "duckdb-1.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46d8395fbcea7231fd5032a250b673cc99352fef349b718a23dea2c0dd2b8dec"}, + {file = "duckdb-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:6ad1fc1a4d57e7616944166a5f9417bdbca1ea65c490797e3786e3a42e162d8a"}, + {file = "duckdb-1.0.0.tar.gz", hash = "sha256:a2a059b77bc7d5b76ae9d88e267372deff19c291048d59450c431e166233d453"}, ] [[package]] @@ -4462,4 +4461,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "8024e9ca0aa700346e902b232337c8bad69e5cd6e482db4999446f6177e7646d" +content-hash = "70062b276f3f6683a563c251678e45ad68784a2b906c75f2e5ef274f6553330e" diff --git a/pyproject.toml b/pyproject.toml index 3a928ec47c..cb2f668553 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ zstandard = ">=0.13.0,<1.0.0" tenacity = ">=8.2.3,<9.0.0" pyarrow = { version = ">=9.0.0,<17.0.0", optional = true } pandas = { version = ">=1.0.0,<3.0.0", optional = true } -duckdb = { version = ">=0.5.0,<1.0.0", optional = true } +duckdb = { version = ">=0.5.0,<2.0.0", optional = true } ray = { version = ">=2.0.0,<2.10.0", optional = true } python-snappy = { version = ">=0.6.0,<1.0.0", optional = true } thrift = { version = ">=0.13.0,<1.0.0", optional = true } From 3585778898804b93cc650ec0d4e8aaac79923369 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 23:58:24 -0700 Subject: [PATCH 33/68] Bump typing-extensions from 4.12.0 to 4.12.1 (#794) Bumps [typing-extensions](https://github.com/python/typing_extensions) from 4.12.0 to 4.12.1. - [Release notes](https://github.com/python/typing_extensions/releases) - [Changelog](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md) - [Commits](https://github.com/python/typing_extensions/compare/4.12.0...4.12.1) --- updated-dependencies: - dependency-name: typing-extensions dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index eddcc76e29..5d14cb30bd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4082,13 +4082,13 @@ telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.12.0" +version = "4.12.1" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, - {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, + {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, + {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, ] [[package]] @@ -4461,4 +4461,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "70062b276f3f6683a563c251678e45ad68784a2b906c75f2e5ef274f6553330e" +content-hash = "850e3a96dff9279f4e7951109a55f3ae5510d92ebc010bf2c3623ddcb48f1dd3" diff --git a/pyproject.toml b/pyproject.toml index cb2f668553..9604ca8f81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ fastavro = "1.9.4" coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.12.1" moto = { version = "^5.0.2", extras = ["server"] } -typing-extensions = "4.12.0" +typing-extensions = "4.12.1" pytest-mock = "3.14.0" pyspark = "3.5.1" cython = "3.0.10" From a11036873990cd9c8aae2c8af667e2974f4bac9d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 23:58:36 -0700 Subject: [PATCH 34/68] Bump pydantic from 2.7.2 to 2.7.3 (#795) Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.7.2 to 2.7.3. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.7.2...v2.7.3) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 168 ++++++++++++++++++++++++++-------------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5d14cb30bd..aa77a714ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3023,18 +3023,18 @@ files = [ [[package]] name = "pydantic" -version = "2.7.2" +version = "2.7.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"}, - {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"}, + {file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"}, + {file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.18.3" +pydantic-core = "2.18.4" typing-extensions = ">=4.6.1" [package.extras] @@ -3042,90 +3042,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.18.3" +version = "2.18.4" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"}, - {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"}, - {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"}, - {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"}, - {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"}, - {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"}, - {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"}, - {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"}, - {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"}, - {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"}, - {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"}, - {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a55b5b16c839df1070bc113c1f7f94a0af4433fcfa1b41799ce7606e5c79ce0a"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d0dcc59664fcb8974b356fe0a18a672d6d7cf9f54746c05f43275fc48636851"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8951eee36c57cd128f779e641e21eb40bc5073eb28b2d23f33eb0ef14ffb3f5d"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4701b19f7e3a06ea655513f7938de6f108123bf7c86bbebb1196eb9bd35cf724"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00a3f196329e08e43d99b79b286d60ce46bed10f2280d25a1718399457e06be"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97736815b9cc893b2b7f663628e63f436018b75f44854c8027040e05230eeddb"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6891a2ae0e8692679c07728819b6e2b822fb30ca7445f67bbf6509b25a96332c"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bc4ff9805858bd54d1a20efff925ccd89c9d2e7cf4986144b30802bf78091c3e"}, + {file = "pydantic_core-2.18.4-cp310-none-win32.whl", hash = "sha256:1b4de2e51bbcb61fdebd0ab86ef28062704f62c82bbf4addc4e37fa4b00b7cbc"}, + {file = "pydantic_core-2.18.4-cp310-none-win_amd64.whl", hash = "sha256:6a750aec7bf431517a9fd78cb93c97b9b0c496090fee84a47a0d23668976b4b0"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:942ba11e7dfb66dc70f9ae66b33452f51ac7bb90676da39a7345e99ffb55402d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2ebef0e0b4454320274f5e83a41844c63438fdc874ea40a8b5b4ecb7693f1c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642295cd0c8df1b86fc3dced1d067874c353a188dc8e0f744626d49e9aa51c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f09baa656c904807e832cf9cce799c6460c450c4ad80803517032da0cd062e2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98906207f29bc2c459ff64fa007afd10a8c8ac080f7e4d5beff4c97086a3dabd"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19894b95aacfa98e7cb093cd7881a0c76f55731efad31073db4521e2b6ff5b7d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fbbdc827fe5e42e4d196c746b890b3d72876bdbf160b0eafe9f0334525119c8"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f85d05aa0918283cf29a30b547b4df2fbb56b45b135f9e35b6807cb28bc47951"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e85637bc8fe81ddb73fda9e56bab24560bdddfa98aa64f87aaa4e4b6730c23d2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2f5966897e5461f818e136b8451d0551a2e77259eb0f73a837027b47dc95dab9"}, + {file = "pydantic_core-2.18.4-cp311-none-win32.whl", hash = "sha256:44c7486a4228413c317952e9d89598bcdfb06399735e49e0f8df643e1ccd0558"}, + {file = "pydantic_core-2.18.4-cp311-none-win_amd64.whl", hash = "sha256:8a7164fe2005d03c64fd3b85649891cd4953a8de53107940bf272500ba8a788b"}, + {file = "pydantic_core-2.18.4-cp311-none-win_arm64.whl", hash = "sha256:4e99bc050fe65c450344421017f98298a97cefc18c53bb2f7b3531eb39bc7805"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6f5c4d41b2771c730ea1c34e458e781b18cc668d194958e0112455fff4e402b2"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fdf2156aa3d017fddf8aea5adfba9f777db1d6022d392b682d2a8329e087cef"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4748321b5078216070b151d5271ef3e7cc905ab170bbfd27d5c83ee3ec436695"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:847a35c4d58721c5dc3dba599878ebbdfd96784f3fb8bb2c356e123bdcd73f34"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c40d4eaad41f78e3bbda31b89edc46a3f3dc6e171bf0ecf097ff7a0ffff7cb1"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a5e440dbe315ab9825fcd459b8814bb92b27c974cbc23c3e8baa2b76890077"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01dd777215e2aa86dfd664daed5957704b769e726626393438f9c87690ce78c3"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4b06beb3b3f1479d32befd1f3079cc47b34fa2da62457cdf6c963393340b56e9"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:564d7922e4b13a16b98772441879fcdcbe82ff50daa622d681dd682175ea918c"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0eb2a4f660fcd8e2b1c90ad566db2b98d7f3f4717c64fe0a83e0adb39766d5b8"}, + {file = "pydantic_core-2.18.4-cp312-none-win32.whl", hash = "sha256:8b8bab4c97248095ae0c4455b5a1cd1cdd96e4e4769306ab19dda135ea4cdb07"}, + {file = "pydantic_core-2.18.4-cp312-none-win_amd64.whl", hash = "sha256:14601cdb733d741b8958224030e2bfe21a4a881fb3dd6fbb21f071cabd48fa0a"}, + {file = "pydantic_core-2.18.4-cp312-none-win_arm64.whl", hash = "sha256:c1322d7dd74713dcc157a2b7898a564ab091ca6c58302d5c7b4c07296e3fd00f"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:823be1deb01793da05ecb0484d6c9e20baebb39bd42b5d72636ae9cf8350dbd2"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebef0dd9bf9b812bf75bda96743f2a6c5734a02092ae7f721c048d156d5fabae"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae1d6df168efb88d7d522664693607b80b4080be6750c913eefb77e34c12c71a"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9899c94762343f2cc2fc64c13e7cae4c3cc65cdfc87dd810a31654c9b7358cc"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99457f184ad90235cfe8461c4d70ab7dd2680e28821c29eca00252ba90308c78"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18f469a3d2a2fdafe99296a87e8a4c37748b5080a26b806a707f25a902c040a8"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cdf28938ac6b8b49ae5e92f2735056a7ba99c9b110a474473fd71185c1af5d"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:938cb21650855054dc54dfd9120a851c974f95450f00683399006aa6e8abb057"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:44cd83ab6a51da80fb5adbd9560e26018e2ac7826f9626bc06ca3dc074cd198b"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:972658f4a72d02b8abfa2581d92d59f59897d2e9f7e708fdabe922f9087773af"}, + {file = "pydantic_core-2.18.4-cp38-none-win32.whl", hash = "sha256:1d886dc848e60cb7666f771e406acae54ab279b9f1e4143babc9c2258213daa2"}, + {file = "pydantic_core-2.18.4-cp38-none-win_amd64.whl", hash = "sha256:bb4462bd43c2460774914b8525f79b00f8f407c945d50881568f294c1d9b4443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:44a688331d4a4e2129140a8118479443bd6f1905231138971372fcde37e43528"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a2fdd81edd64342c85ac7cf2753ccae0b79bf2dfa063785503cb85a7d3593223"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86110d7e1907ab36691f80b33eb2da87d780f4739ae773e5fc83fb272f88825f"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:46387e38bd641b3ee5ce247563b60c5ca098da9c56c75c157a05eaa0933ed154"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:123c3cec203e3f5ac7b000bd82235f1a3eced8665b63d18be751f115588fea30"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1803ac5c32ec324c5261c7209e8f8ce88e83254c4e1aebdc8b0a39f9ddb443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53db086f9f6ab2b4061958d9c276d1dbe3690e8dd727d6abf2321d6cce37fa94"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abc267fa9837245cc28ea6929f19fa335f3dc330a35d2e45509b6566dc18be23"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a0d829524aaefdebccb869eed855e2d04c21d2d7479b6cada7ace5448416597b"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:509daade3b8649f80d4e5ff21aa5673e4ebe58590b25fe42fac5f0f52c6f034a"}, + {file = "pydantic_core-2.18.4-cp39-none-win32.whl", hash = "sha256:ca26a1e73c48cfc54c4a76ff78df3727b9d9f4ccc8dbee4ae3f73306a591676d"}, + {file = "pydantic_core-2.18.4-cp39-none-win_amd64.whl", hash = "sha256:c67598100338d5d985db1b3d21f3619ef392e185e71b8d52bceacc4a7771ea7e"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:574d92eac874f7f4db0ca653514d823a0d22e2354359d0759e3f6a406db5d55d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1f4d26ceb5eb9eed4af91bebeae4b06c3fb28966ca3a8fb765208cf6b51102ab"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77450e6d20016ec41f43ca4a6c63e9fdde03f0ae3fe90e7c27bdbeaece8b1ed4"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d323a01da91851a4f17bf592faf46149c9169d68430b3146dcba2bb5e5719abc"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43d447dd2ae072a0065389092a231283f62d960030ecd27565672bd40746c507"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:578e24f761f3b425834f297b9935e1ce2e30f51400964ce4801002435a1b41ef"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:81b5efb2f126454586d0f40c4d834010979cb80785173d1586df845a632e4e6d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ab86ce7c8f9bea87b9d12c7f0af71102acbf5ecbc66c17796cff45dae54ef9a5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:90afc12421df2b1b4dcc975f814e21bc1754640d502a2fbcc6d41e77af5ec312"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:51991a89639a912c17bef4b45c87bd83593aee0437d8102556af4885811d59f5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:293afe532740370aba8c060882f7d26cfd00c94cae32fd2e212a3a6e3b7bc15e"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48ece5bde2e768197a2d0f6e925f9d7e3e826f0ad2271120f8144a9db18d5c8"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eae237477a873ab46e8dd748e515c72c0c804fb380fbe6c85533c7de51f23a8f"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:834b5230b5dfc0c1ec37b2fda433b271cbbc0e507560b5d1588e2cc1148cf1ce"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e858ac0a25074ba4bce653f9b5d0a85b7456eaddadc0ce82d3878c22489fa4ee"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2fd41f6eff4c20778d717af1cc50eca52f5afe7805ee530a4fbd0bae284f16e9"}, + {file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"}, ] [package.dependencies] From 9acad248a1fb681c15b170db07aa79eb2d348bbf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Jun 2024 01:21:09 +0200 Subject: [PATCH 35/68] Bump mkdocs-material from 9.5.25 to 9.5.26 (#798) --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 22ded02b4c..ade0a4ac99 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -23,6 +23,6 @@ mkdocstrings-python==1.10.3 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.25 +mkdocs-material==9.5.26 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.9 From 0155405d7da141efba60ba502f6c6c1cbc97bdf9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Jun 2024 01:21:47 +0200 Subject: [PATCH 36/68] Bump mypy-boto3-glue from 1.34.115 to 1.34.121 (#799) --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index aa77a714ab..5c66a71e78 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2489,13 +2489,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.115" -description = "Type annotations for boto3.Glue 1.34.115 service generated with mypy-boto3-builder 7.24.0" +version = "1.34.121" +description = "Type annotations for boto3.Glue 1.34.121 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.115-py3-none-any.whl", hash = "sha256:d1c4633e81381f0e048e65a6f9fd5dcc221e05d18f928d12b74c6ec981ff72a7"}, - {file = "mypy_boto3_glue-1.34.115.tar.gz", hash = "sha256:dae8c44e3237801f80debf424f17b3d9dfd30aa3425fbe270dd87c572d539d9f"}, + {file = "mypy_boto3_glue-1.34.121-py3-none-any.whl", hash = "sha256:4af39a8d6b36f17bed52e59fa73b1760ec337e88ca0923c50cc33239b2b4a9ab"}, + {file = "mypy_boto3_glue-1.34.121.tar.gz", hash = "sha256:5885f8a7292665e3ee49ea8980be2500fb3411ba6975c287b3c817b418c58e2d"}, ] [package.dependencies] From 33a00188b4528b898a2def793a949413c2ff073a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 9 Jun 2024 12:10:53 +0200 Subject: [PATCH 37/68] Bump typing-extensions from 4.12.1 to 4.12.2 (#802) --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5c66a71e78..0d2fb3b964 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4082,13 +4082,13 @@ telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.12.1" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, - {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -4461,4 +4461,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "850e3a96dff9279f4e7951109a55f3ae5510d92ebc010bf2c3623ddcb48f1dd3" +content-hash = "6e68bbd21368ac70baa311ed9567b5ad971b134207972549b1835718f76402a6" diff --git a/pyproject.toml b/pyproject.toml index 9604ca8f81..fe8fe4ed0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ fastavro = "1.9.4" coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.12.1" moto = { version = "^5.0.2", extras = ["server"] } -typing-extensions = "4.12.1" +typing-extensions = "4.12.2" pytest-mock = "3.14.0" pyspark = "3.5.1" cython = "3.0.10" From 94e8a9835995e3b61f07f0dfb48d8a22a1e1d1b0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 9 Jun 2024 12:11:27 +0200 Subject: [PATCH 38/68] Bump getdaft from 0.2.25 to 0.2.27 (#801) --- poetry.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0d2fb3b964..ab239059c3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1343,17 +1343,17 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.2.25" +version = "0.2.27" description = "Distributed Dataframes for Multimodal Data" optional = true python-versions = ">=3.8" files = [ - {file = "getdaft-0.2.25-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7aab5bdf4af6b9bb0f7e0555cd36762d57da97ed026017f3a4b00f97bf5bf7f1"}, - {file = "getdaft-0.2.25-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:12a95f0ce9206c77a439ace0dc705d13acbe0e8278907ad2e57f62e0c01330ad"}, - {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfeef90e2f446f65e0e7292431e5354995fe693cf9bbbd434dafd4b8971ea83"}, - {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b86a42e7310de613a0fb30d68a70ee0678e6605023e48a3c1dd28f8752d380e"}, - {file = "getdaft-0.2.25-cp38-abi3-win_amd64.whl", hash = "sha256:fbb3437e666478d06e661d961e5fd10b8cc33385bd2bafafcd22daf403fe6df1"}, - {file = "getdaft-0.2.25.tar.gz", hash = "sha256:60b2ca7d39447ba4b19eab6ccfd6fc706914ecf43d0080a13c832b013dda589b"}, + {file = "getdaft-0.2.27-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:13f75cd4fa5037760757743fbd04fdcdf5c8294dd7975cc369081f9a2c53e49a"}, + {file = "getdaft-0.2.27-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2d31c0ecb211e8801c158702c53659be13db82c1656aac67cdaa4f8dad6e29e9"}, + {file = "getdaft-0.2.27-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f71bd99964105dc8464fe568c53464f6a44db116bc743cdbc7a5cc83fb126318"}, + {file = "getdaft-0.2.27-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de90e30ebd727423afe32cd2333a7bfa4fceff6a5cc69e3af3839af37f0afdd7"}, + {file = "getdaft-0.2.27-cp38-abi3-win_amd64.whl", hash = "sha256:9eba98926f7fac3e15d63a82a2b510afae454e6e6e509e2026aeebe3a3f74b3d"}, + {file = "getdaft-0.2.27.tar.gz", hash = "sha256:fcb62ddc260c7a8ac8cfaada87d5dd38b46886b02d9b8fe57a27d2aa176325d3"}, ] [package.dependencies] From 1b3673c04d89493e5fde11b6133f2b2c11e422b6 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Mon, 10 Jun 2024 16:57:23 -0400 Subject: [PATCH 39/68] Set `AssertTableUUID` by default on a transaction (#804) --- pyiceberg/table/__init__.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 2d4b342461..2d7f81a67d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -511,6 +511,7 @@ def commit_transaction(self) -> Table: The table with the updates applied. """ if len(self._updates) > 0: + self._requirements += (AssertTableUUID(uuid=self.table_metadata.table_uuid),) self._table._do_commit( # pylint: disable=W0212 updates=self._updates, requirements=self._requirements, @@ -565,7 +566,11 @@ def commit_transaction(self) -> Table: The table with the updates applied. """ self._requirements = (AssertCreate(),) - return super().commit_transaction() + self._table._do_commit( # pylint: disable=W0212 + updates=self._updates, + requirements=self._requirements, + ) + return self._table class AssignUUIDUpdate(IcebergBaseModel): @@ -2919,10 +2924,7 @@ def _commit(self) -> UpdatesAndRequirements: snapshot_id=self._snapshot_id, parent_snapshot_id=self._parent_snapshot_id, ref_name="main", type="branch" ), ), - ( - AssertTableUUID(uuid=self._transaction.table_metadata.table_uuid), - AssertRefSnapshotId(snapshot_id=self._parent_snapshot_id, ref="main"), - ), + (AssertRefSnapshotId(snapshot_id=self._parent_snapshot_id, ref="main"),), ) From a6858f77751e2ee6ce0c1c3feb40259acab0c38a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 17:08:45 -0700 Subject: [PATCH 40/68] Bump pypa/cibuildwheel from 2.18.1 to 2.19.0 (#805) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.18.1 to 2.19.0. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.18.1...v2.19.0) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/python-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index b8d9b5dae3..d7e39284dc 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -59,7 +59,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') - name: Build wheels - uses: pypa/cibuildwheel@v2.18.1 + uses: pypa/cibuildwheel@v2.19.0 with: output-dir: wheelhouse config-file: "pyproject.toml" From df691652707a44e6d0b57a19ff5408dfc4b6c04f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 17:08:55 -0700 Subject: [PATCH 41/68] Bump griffe from 0.45.2 to 0.45.3 (#806) Bumps [griffe](https://github.com/mkdocstrings/griffe) from 0.45.2 to 0.45.3. - [Release notes](https://github.com/mkdocstrings/griffe/releases) - [Changelog](https://github.com/mkdocstrings/griffe/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/griffe/compare/0.45.2...0.45.3) --- updated-dependencies: - dependency-name: griffe dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index ade0a4ac99..1de5c07914 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,7 +16,7 @@ # under the License. mkdocs==1.6.0 -griffe==0.45.2 +griffe==0.45.3 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.3 From d01a7b54649d530118197c0ed039ccfec0fd0fbe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 01:37:18 -0700 Subject: [PATCH 42/68] Bump msal from 1.26.0 to 1.28.0 (#812) Bumps [msal](https://github.com/AzureAD/microsoft-authentication-library-for-python) from 1.26.0 to 1.28.0. - [Release notes](https://github.com/AzureAD/microsoft-authentication-library-for-python/releases) - [Commits](https://github.com/AzureAD/microsoft-authentication-library-for-python/compare/1.26.0...1.28.0) --- updated-dependencies: - dependency-name: msal dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index ab239059c3..dfaff2cae9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2287,22 +2287,22 @@ tests = ["pytest (>=4.6)"] [[package]] name = "msal" -version = "1.26.0" +version = "1.28.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." -optional = true -python-versions = ">=2.7" +optional = false +python-versions = ">=3.7" files = [ - {file = "msal-1.26.0-py2.py3-none-any.whl", hash = "sha256:be77ba6a8f49c9ff598bbcdc5dfcf1c9842f3044300109af738e8c3e371065b5"}, - {file = "msal-1.26.0.tar.gz", hash = "sha256:224756079fe338be838737682b49f8ebc20a87c1c5eeaf590daae4532b83de15"}, + {file = "msal-1.28.0-py3-none-any.whl", hash = "sha256:3064f80221a21cd535ad8c3fafbb3a3582cd9c7e9af0bb789ae14f726a0ca99b"}, + {file = "msal-1.28.0.tar.gz", hash = "sha256:80bbabe34567cb734efd2ec1869b2d98195c927455369d8077b3c542088c5c9d"}, ] [package.dependencies] -cryptography = ">=0.6,<44" +cryptography = ">=0.6,<45" PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.14)"] +broker = ["pymsalruntime (>=0.13.2,<0.15)"] [[package]] name = "msal-extensions" @@ -3150,7 +3150,7 @@ windows-terminal = ["colorama (>=0.4.6)"] name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, From de2b2995e26e17a76a4569879764d2f0b46da219 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 06:13:42 -0700 Subject: [PATCH 43/68] Bump azure-identity from 1.15.0 to 1.16.1 (#811) Bumps [azure-identity](https://github.com/Azure/azure-sdk-for-python) from 1.15.0 to 1.16.1. - [Release notes](https://github.com/Azure/azure-sdk-for-python/releases) - [Changelog](https://github.com/Azure/azure-sdk-for-python/blob/main/doc/esrp_release.md) - [Commits](https://github.com/Azure/azure-sdk-for-python/compare/azure-identity_1.15.0...azure-identity_1.16.1) --- updated-dependencies: - dependency-name: azure-identity dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/poetry.lock b/poetry.lock index dfaff2cae9..90beb2c1df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -262,7 +262,7 @@ wrapt = "*" name = "azure-core" version = "1.30.0" description = "Microsoft Azure Core Library for Python" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "azure-core-1.30.0.tar.gz", hash = "sha256:6f3a7883ef184722f6bd997262eddaf80cfe7e5b3e0caaaf8db1695695893d35"}, @@ -295,20 +295,20 @@ requests = ">=2.20.0" [[package]] name = "azure-identity" -version = "1.15.0" +version = "1.16.1" description = "Microsoft Azure Identity Library for Python" -optional = true -python-versions = ">=3.7" +optional = false +python-versions = ">=3.8" files = [ - {file = "azure-identity-1.15.0.tar.gz", hash = "sha256:4c28fc246b7f9265610eb5261d65931183d019a23d4b0e99357facb2e6c227c8"}, - {file = "azure_identity-1.15.0-py3-none-any.whl", hash = "sha256:a14b1f01c7036f11f148f22cd8c16e05035293d714458d6b44ddf534d93eb912"}, + {file = "azure-identity-1.16.1.tar.gz", hash = "sha256:6d93f04468f240d59246d8afde3091494a5040d4f141cad0f49fc0c399d0d91e"}, + {file = "azure_identity-1.16.1-py3-none-any.whl", hash = "sha256:8fb07c25642cd4ac422559a8b50d3e77f73dcc2bbfaba419d06d6c9d7cff6726"}, ] [package.dependencies] -azure-core = ">=1.23.0,<2.0.0" +azure-core = ">=1.23.0" cryptography = ">=2.5" -msal = ">=1.24.0,<2.0.0" -msal-extensions = ">=0.3.0,<2.0.0" +msal = ">=1.24.0" +msal-extensions = ">=0.3.0" [[package]] name = "azure-storage-blob" @@ -2308,7 +2308,7 @@ broker = ["pymsalruntime (>=0.13.2,<0.15)"] name = "msal-extensions" version = "1.1.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, @@ -2778,7 +2778,7 @@ files = [ name = "portalocker" version = "2.8.2" description = "Wraps the portalocker recipe for easy usage" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, From 194e2ef80031a378077b411443ce0210db2e7631 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:40:12 +0200 Subject: [PATCH 44/68] Bump pydantic from 2.7.3 to 2.7.4 (#816) Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.7.3 to 2.7.4. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.7.3...v2.7.4) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 90beb2c1df..75414b2b8c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -262,7 +262,7 @@ wrapt = "*" name = "azure-core" version = "1.30.0" description = "Microsoft Azure Core Library for Python" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "azure-core-1.30.0.tar.gz", hash = "sha256:6f3a7883ef184722f6bd997262eddaf80cfe7e5b3e0caaaf8db1695695893d35"}, @@ -297,7 +297,7 @@ requests = ">=2.20.0" name = "azure-identity" version = "1.16.1" description = "Microsoft Azure Identity Library for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "azure-identity-1.16.1.tar.gz", hash = "sha256:6d93f04468f240d59246d8afde3091494a5040d4f141cad0f49fc0c399d0d91e"}, @@ -2289,7 +2289,7 @@ tests = ["pytest (>=4.6)"] name = "msal" version = "1.28.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "msal-1.28.0-py3-none-any.whl", hash = "sha256:3064f80221a21cd535ad8c3fafbb3a3582cd9c7e9af0bb789ae14f726a0ca99b"}, @@ -2308,7 +2308,7 @@ broker = ["pymsalruntime (>=0.13.2,<0.15)"] name = "msal-extensions" version = "1.1.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, @@ -2778,7 +2778,7 @@ files = [ name = "portalocker" version = "2.8.2" description = "Wraps the portalocker recipe for easy usage" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, @@ -3023,13 +3023,13 @@ files = [ [[package]] name = "pydantic" -version = "2.7.3" +version = "2.7.4" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"}, - {file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"}, + {file = "pydantic-2.7.4-py3-none-any.whl", hash = "sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0"}, + {file = "pydantic-2.7.4.tar.gz", hash = "sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52"}, ] [package.dependencies] @@ -3150,7 +3150,7 @@ windows-terminal = ["colorama (>=0.4.6)"] name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, From 2407a3c865a9a84a9d99c04fbffc37aa240aeaa1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:57:18 +0200 Subject: [PATCH 45/68] Bump pypa/cibuildwheel from 2.19.0 to 2.19.1 (#814) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.19.0 to 2.19.1. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.19.0...v2.19.1) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/python-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index d7e39284dc..9519bad0b4 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -59,7 +59,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') - name: Build wheels - uses: pypa/cibuildwheel@v2.19.0 + uses: pypa/cibuildwheel@v2.19.1 with: output-dir: wheelhouse config-file: "pyproject.toml" From d4a4eedee247c6a14f31383b0e43a91169d28539 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Fri, 14 Jun 2024 16:13:19 -0400 Subject: [PATCH 46/68] Cast PyArrow schema to `large_*` types (#807) * _pyarrow_with * fix * fix test * adopt review feedback * revert accidental conf change * adopt-nit --- pyiceberg/io/pyarrow.py | 56 ++++++++-- tests/catalog/test_sql.py | 10 +- tests/conftest.py | 4 +- tests/integration/test_writes/test_writes.py | 54 ++++++++++ tests/io/test_pyarrow.py | 102 +++++++++---------- tests/io/test_pyarrow_visitor.py | 39 ++++++- tests/test_schema.py | 2 +- 7 files changed, 197 insertions(+), 70 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 71925c27cd..935b78cece 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -504,7 +504,7 @@ def field(self, field: NestedField, field_result: pa.DataType) -> pa.Field: def list(self, list_type: ListType, element_result: pa.DataType) -> pa.DataType: element_field = self.field(list_type.element_field, element_result) - return pa.list_(value_type=element_field) + return pa.large_list(value_type=element_field) def map(self, map_type: MapType, key_result: pa.DataType, value_result: pa.DataType) -> pa.DataType: key_field = self.field(map_type.key_field, key_result) @@ -548,7 +548,7 @@ def visit_timestamptz(self, _: TimestamptzType) -> pa.DataType: return pa.timestamp(unit="us", tz="UTC") def visit_string(self, _: StringType) -> pa.DataType: - return pa.string() + return pa.large_string() def visit_uuid(self, _: UUIDType) -> pa.DataType: return pa.binary(16) @@ -680,6 +680,10 @@ def _pyarrow_to_schema_without_ids(schema: pa.Schema) -> Schema: return visit_pyarrow(schema, _ConvertToIcebergWithoutIDs()) +def _pyarrow_schema_ensure_large_types(schema: pa.Schema) -> pa.Schema: + return visit_pyarrow(schema, _ConvertToLargeTypes()) + + @singledispatch def visit_pyarrow(obj: Union[pa.DataType, pa.Schema], visitor: PyArrowSchemaVisitor[T]) -> T: """Apply a pyarrow schema visitor to any point within a schema. @@ -952,6 +956,30 @@ def after_map_value(self, element: pa.Field) -> None: self._field_names.pop() +class _ConvertToLargeTypes(PyArrowSchemaVisitor[Union[pa.DataType, pa.Schema]]): + def schema(self, schema: pa.Schema, struct_result: pa.StructType) -> pa.Schema: + return pa.schema(struct_result) + + def struct(self, struct: pa.StructType, field_results: List[pa.Field]) -> pa.StructType: + return pa.struct(field_results) + + def field(self, field: pa.Field, field_result: pa.DataType) -> pa.Field: + return field.with_type(field_result) + + def list(self, list_type: pa.ListType, element_result: pa.DataType) -> pa.DataType: + return pa.large_list(element_result) + + def map(self, map_type: pa.MapType, key_result: pa.DataType, value_result: pa.DataType) -> pa.DataType: + return pa.map_(key_result, value_result) + + def primitive(self, primitive: pa.DataType) -> pa.DataType: + if primitive == pa.string(): + return pa.large_string() + elif primitive == pa.binary(): + return pa.large_binary() + return primitive + + class _ConvertToIcebergWithoutIDs(_ConvertToIceberg): """ Converts PyArrowSchema to Iceberg Schema with all -1 ids. @@ -998,7 +1026,9 @@ def _task_to_table( fragment_scanner = ds.Scanner.from_fragment( fragment=fragment, - schema=physical_schema, + # We always use large types in memory as it uses larger offsets + # That can chunk more row values into the buffers + schema=_pyarrow_schema_ensure_large_types(physical_schema), # This will push down the query to Arrow. # But in case there are positional deletes, we have to apply them first filter=pyarrow_filter if not positional_deletes else None, @@ -1167,8 +1197,14 @@ def __init__(self, file_schema: Schema): def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array: file_field = self.file_schema.find_field(field.field_id) - if field.field_type.is_primitive and field.field_type != file_field.field_type: - return values.cast(schema_to_pyarrow(promote(file_field.field_type, field.field_type), include_field_ids=False)) + if field.field_type.is_primitive: + if field.field_type != file_field.field_type: + return values.cast(schema_to_pyarrow(promote(file_field.field_type, field.field_type), include_field_ids=False)) + elif (target_type := schema_to_pyarrow(field.field_type, include_field_ids=False)) != values.type: + # if file_field and field_type (e.g. String) are the same + # but the pyarrow type of the array is different from the expected type + # (e.g. string vs larger_string), we want to cast the array to the larger type + return values.cast(target_type) return values def _construct_field(self, field: NestedField, arrow_type: pa.DataType) -> pa.Field: @@ -1207,13 +1243,13 @@ def field(self, field: NestedField, _: Optional[pa.Array], field_array: Optional return field_array def list(self, list_type: ListType, list_array: Optional[pa.Array], value_array: Optional[pa.Array]) -> Optional[pa.Array]: - if isinstance(list_array, pa.ListArray) and value_array is not None: + if isinstance(list_array, (pa.ListArray, pa.LargeListArray, pa.FixedSizeListArray)) and value_array is not None: if isinstance(value_array, pa.StructArray): # This can be removed once this has been fixed: # https://github.com/apache/arrow/issues/38809 - list_array = pa.ListArray.from_arrays(list_array.offsets, value_array) + list_array = pa.LargeListArray.from_arrays(list_array.offsets, value_array) - arrow_field = pa.list_(self._construct_field(list_type.element_field, value_array.type)) + arrow_field = pa.large_list(self._construct_field(list_type.element_field, value_array.type)) return list_array.cast(arrow_field) else: return None @@ -1263,7 +1299,7 @@ def field_partner(self, partner_struct: Optional[pa.Array], field_id: int, _: st return None def list_element_partner(self, partner_list: Optional[pa.Array]) -> Optional[pa.Array]: - return partner_list.values if isinstance(partner_list, pa.ListArray) else None + return partner_list.values if isinstance(partner_list, (pa.ListArray, pa.LargeListArray, pa.FixedSizeListArray)) else None def map_key_partner(self, partner_map: Optional[pa.Array]) -> Optional[pa.Array]: return partner_map.keys if isinstance(partner_map, pa.MapArray) else None @@ -1800,10 +1836,10 @@ def write_parquet(task: WriteTask) -> DataFile: # otherwise use the original schema if (sanitized_schema := sanitize_column_names(table_schema)) != table_schema: file_schema = sanitized_schema - arrow_table = to_requested_schema(requested_schema=file_schema, file_schema=table_schema, table=arrow_table) else: file_schema = table_schema + arrow_table = to_requested_schema(requested_schema=file_schema, file_schema=table_schema, table=arrow_table) file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}' fo = io.new_output(file_path) with fo.create(overwrite=True) as fos: diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 545916223a..24adfb88ab 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -288,7 +288,7 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier) pa.array([None, "A", "B", "C"]), # 'large' column ], schema=pa.schema([ - pa.field("foo", pa.string(), nullable=True), + pa.field("foo", pa.large_string(), nullable=True), pa.field("bar", pa.int32(), nullable=False), pa.field("baz", pa.bool_(), nullable=True), pa.field("large", pa.large_string(), nullable=True), @@ -1325,7 +1325,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: { "foo": ["a", None, "z"], }, - schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + schema=pa.schema([pa.field("foo", pa.large_string(), nullable=True)]), ) tbl = catalog.create_table(identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)}) @@ -1336,7 +1336,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: "bar": [19, None, 25], }, schema=pa.schema([ - pa.field("foo", pa.string(), nullable=True), + pa.field("foo", pa.large_string(), nullable=True), pa.field("bar", pa.int32(), nullable=True), ]), ) @@ -1375,7 +1375,7 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N { "foo": ["a", None, "z"], }, - schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + schema=pa.schema([pa.field("foo", pa.large_string(), nullable=True)]), ) pa_table_with_column = pa.Table.from_pydict( @@ -1384,7 +1384,7 @@ def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> N "bar": [19, None, 25], }, schema=pa.schema([ - pa.field("foo", pa.string(), nullable=True), + pa.field("foo", pa.large_string(), nullable=True), pa.field("bar", pa.int32(), nullable=True), ]), ) diff --git a/tests/conftest.py b/tests/conftest.py index d3f23689a2..a160322ea3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2116,8 +2116,8 @@ def pa_schema() -> "pa.Schema": return pa.schema([ ("bool", pa.bool_()), - ("string", pa.string()), - ("string_long", pa.string()), + ("string", pa.large_string()), + ("string_long", pa.large_string()), ("int", pa.int32()), ("long", pa.int64()), ("float", pa.float32()), diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index e329adcd5c..4585406cbb 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -340,6 +340,60 @@ def test_python_writes_dictionary_encoded_column_with_spark_reads( assert spark_df.equals(pyiceberg_df) +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_python_writes_with_small_and_large_types_spark_reads( + spark: SparkSession, session_catalog: Catalog, format_version: int +) -> None: + identifier = "default.python_writes_with_small_and_large_types_spark_reads" + TEST_DATA = { + "foo": ["a", None, "z"], + "id": [1, 2, 3], + "name": ["AB", "CD", "EF"], + "address": [ + {"street": "123", "city": "SFO", "zip": 12345, "bar": "a"}, + {"street": "456", "city": "SW", "zip": 67890, "bar": "b"}, + {"street": "789", "city": "Random", "zip": 10112, "bar": "c"}, + ], + } + pa_schema = pa.schema([ + pa.field("foo", pa.large_string()), + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + pa.field( + "address", + pa.struct([ + pa.field("street", pa.string()), + pa.field("city", pa.string()), + pa.field("zip", pa.int32()), + pa.field("bar", pa.large_string()), + ]), + ), + ]) + arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema) + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema) + + tbl.overwrite(arrow_table) + spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas() + pyiceberg_df = tbl.scan().to_pandas() + assert spark_df.equals(pyiceberg_df) + arrow_table_on_read = tbl.scan().to_arrow() + assert arrow_table_on_read.schema == pa.schema([ + pa.field("foo", pa.large_string()), + pa.field("id", pa.int32()), + pa.field("name", pa.large_string()), + pa.field( + "address", + pa.struct([ + pa.field("street", pa.large_string()), + pa.field("city", pa.large_string()), + pa.field("zip", pa.int32()), + pa.field("bar", pa.large_string()), + ]), + ), + ]) + + @pytest.mark.integration def test_write_bin_pack_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.write_bin_pack_data_files" diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index baa9e30824..ecb946a98b 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -346,7 +346,7 @@ def test_deleting_hdfs_file_not_found() -> None: def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None: actual = schema_to_pyarrow(table_schema_nested) - expected = """foo: string + expected = """foo: large_string -- field metadata -- PARQUET:field_id: '1' bar: int32 not null @@ -355,20 +355,20 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) baz: bool -- field metadata -- PARQUET:field_id: '3' -qux: list not null - child 0, element: string not null +qux: large_list not null + child 0, element: large_string not null -- field metadata -- PARQUET:field_id: '5' -- field metadata -- PARQUET:field_id: '4' -quux: map> not null - child 0, entries: struct not null> not null - child 0, key: string not null +quux: map> not null + child 0, entries: struct not null> not null + child 0, key: large_string not null -- field metadata -- PARQUET:field_id: '7' - child 1, value: map not null - child 0, entries: struct not null - child 0, key: string not null + child 1, value: map not null + child 0, entries: struct not null + child 0, key: large_string not null -- field metadata -- PARQUET:field_id: '9' child 1, value: int32 not null @@ -378,7 +378,7 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) PARQUET:field_id: '8' -- field metadata -- PARQUET:field_id: '6' -location: list not null> not null +location: large_list not null> not null child 0, element: struct not null child 0, latitude: float -- field metadata -- @@ -390,8 +390,8 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) PARQUET:field_id: '12' -- field metadata -- PARQUET:field_id: '11' -person: struct - child 0, name: string +person: struct + child 0, name: large_string -- field metadata -- PARQUET:field_id: '16' child 1, age: int32 not null @@ -404,24 +404,24 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) def test_schema_to_pyarrow_schema_exclude_field_ids(table_schema_nested: Schema) -> None: actual = schema_to_pyarrow(table_schema_nested, include_field_ids=False) - expected = """foo: string + expected = """foo: large_string bar: int32 not null baz: bool -qux: list not null - child 0, element: string not null -quux: map> not null - child 0, entries: struct not null> not null - child 0, key: string not null - child 1, value: map not null - child 0, entries: struct not null - child 0, key: string not null +qux: large_list not null + child 0, element: large_string not null +quux: map> not null + child 0, entries: struct not null> not null + child 0, key: large_string not null + child 1, value: map not null + child 0, entries: struct not null + child 0, key: large_string not null child 1, value: int32 not null -location: list not null> not null +location: large_list not null> not null child 0, element: struct not null child 0, latitude: float child 1, longitude: float -person: struct - child 0, name: string +person: struct + child 0, name: large_string child 1, age: int32 not null""" assert repr(actual) == expected @@ -486,7 +486,7 @@ def test_timestamptz_type_to_pyarrow() -> None: def test_string_type_to_pyarrow() -> None: iceberg_type = StringType() - assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.string() + assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_string() def test_binary_type_to_pyarrow() -> None: @@ -496,7 +496,7 @@ def test_binary_type_to_pyarrow() -> None: def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None: expected = pa.struct([ - pa.field("foo", pa.string(), nullable=True, metadata={"field_id": "1"}), + pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}), pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}), pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}), ]) @@ -513,7 +513,7 @@ def test_map_type_to_pyarrow() -> None: ) assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.map_( pa.field("key", pa.int32(), nullable=False, metadata={"field_id": "1"}), - pa.field("value", pa.string(), nullable=False, metadata={"field_id": "2"}), + pa.field("value", pa.large_string(), nullable=False, metadata={"field_id": "2"}), ) @@ -523,7 +523,7 @@ def test_list_type_to_pyarrow() -> None: element_type=IntegerType(), element_required=True, ) - assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.list_( + assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.large_list( pa.field("element", pa.int32(), nullable=False, metadata={"field_id": "1"}) ) @@ -606,11 +606,11 @@ def test_expr_less_than_or_equal_to_pyarrow(bound_reference: BoundReference[str] def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None: assert repr(expression_to_pyarrow(BoundIn(bound_reference, {literal("hello"), literal("world")}))) in ( - """""", - """""", @@ -619,11 +619,11 @@ def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None: def test_expr_not_in_to_pyarrow(bound_reference: BoundReference[str]) -> None: assert repr(expression_to_pyarrow(BoundNotIn(bound_reference, {literal("hello"), literal("world")}))) in ( - """""", - """""", @@ -967,12 +967,12 @@ def test_projection_add_column(file_int: str) -> None: assert ( repr(result_table.schema) == """id: int32 -list: list +list: large_list child 0, element: int32 -map: map - child 0, entries: struct not null +map: map + child 0, entries: struct not null child 0, key: int32 not null - child 1, value: string + child 1, value: large_string location: struct child 0, lat: double child 1, lon: double""" @@ -988,7 +988,7 @@ def test_read_list(schema_list: Schema, file_list: str) -> None: assert ( repr(result_table.schema) - == """ids: list + == """ids: large_list child 0, element: int32""" ) @@ -1002,10 +1002,10 @@ def test_read_map(schema_map: Schema, file_map: str) -> None: assert ( repr(result_table.schema) - == """properties: map - child 0, entries: struct not null - child 0, key: string not null - child 1, value: string not null""" + == """properties: map + child 0, entries: struct not null + child 0, key: large_string not null + child 1, value: large_string not null""" ) @@ -1025,10 +1025,10 @@ def test_projection_add_column_struct(schema_int: Schema, file_int: str) -> None assert r.as_py() is None assert ( repr(result_table.schema) - == """id: map - child 0, entries: struct not null + == """id: map + child 0, entries: struct not null child 0, key: int32 not null - child 1, value: string""" + child 1, value: large_string""" ) @@ -1231,7 +1231,7 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of ] assert ( repr(result_table.schema) - == """locations: list> + == """locations: large_list> child 0, element: struct child 0, latitude: double not null child 1, longitude: double not null @@ -1279,9 +1279,9 @@ def test_projection_maps_of_structs(schema_map_of_structs: Schema, file_map_of_s assert actual.as_py() == expected assert ( repr(result_table.schema) - == """locations: map> - child 0, entries: struct not null> not null - child 0, key: string not null + == """locations: map> + child 0, entries: struct not null> not null + child 0, key: large_string not null child 1, value: struct not null child 0, latitude: double not null child 1, longitude: double not null @@ -1378,7 +1378,7 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp assert ( str(with_deletes) == """pyarrow.Table -foo: string +foo: large_string bar: int32 not null baz: bool ---- @@ -1416,7 +1416,7 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_ assert ( str(with_deletes) == """pyarrow.Table -foo: string +foo: large_string bar: int32 not null baz: bool ---- @@ -1447,7 +1447,7 @@ def test_pyarrow_wrap_fsspec(example_task: FileScanTask, table_schema_simple: Sc assert ( str(projection) == """pyarrow.Table -foo: string +foo: large_string bar: int32 not null baz: bool ---- diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index c8571dacf1..d3b6217c7b 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -25,6 +25,7 @@ _ConvertToIceberg, _ConvertToIcebergWithoutIDs, _HasIds, + _pyarrow_schema_ensure_large_types, pyarrow_to_schema, schema_to_pyarrow, visit_pyarrow, @@ -209,7 +210,7 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None: def test_pyarrow_string_to_iceberg() -> None: - pyarrow_type = pa.string() + pyarrow_type = pa.large_string() converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) assert converted_iceberg_type == StringType() assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type @@ -543,3 +544,39 @@ def test_pyarrow_schema_to_schema_fresh_ids_nested_schema( pyarrow_schema_nested_without_ids: pa.Schema, iceberg_schema_nested_no_ids: Schema ) -> None: assert visit_pyarrow(pyarrow_schema_nested_without_ids, _ConvertToIcebergWithoutIDs()) == iceberg_schema_nested_no_ids + + +def test_pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids: pa.Schema) -> None: + expected_schema = pa.schema([ + pa.field("foo", pa.large_string(), nullable=False), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("qux", pa.large_list(pa.large_string()), nullable=False), + pa.field( + "quux", + pa.map_( + pa.large_string(), + pa.map_(pa.large_string(), pa.int32()), + ), + nullable=False, + ), + pa.field( + "location", + pa.large_list( + pa.struct([ + pa.field("latitude", pa.float32(), nullable=False), + pa.field("longitude", pa.float32(), nullable=False), + ]), + ), + nullable=False, + ), + pa.field( + "person", + pa.struct([ + pa.field("name", pa.large_string(), nullable=True), + pa.field("age", pa.int32(), nullable=False), + ]), + nullable=True, + ), + ]) + assert _pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids) == expected_schema diff --git a/tests/test_schema.py b/tests/test_schema.py index 96109ce9c2..23b42ef49e 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1610,7 +1610,7 @@ def test_arrow_schema() -> None: ) expected_schema = pa.schema([ - pa.field("foo", pa.string(), nullable=False), + pa.field("foo", pa.large_string(), nullable=False), pa.field("bar", pa.int32(), nullable=True), pa.field("baz", pa.bool_(), nullable=True), ]) From c579e9f68546b58b43b96e2785be2d577efdb86b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 22:13:43 +0200 Subject: [PATCH 47/68] Bump mypy-boto3-glue from 1.34.121 to 1.34.126 (#815) Bumps [mypy-boto3-glue](https://github.com/youtype/mypy_boto3_builder) from 1.34.121 to 1.34.126. - [Release notes](https://github.com/youtype/mypy_boto3_builder/releases) - [Commits](https://github.com/youtype/mypy_boto3_builder/commits) --- updated-dependencies: - dependency-name: mypy-boto3-glue dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 75414b2b8c..a9b9b1a5a7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2489,13 +2489,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.121" -description = "Type annotations for boto3.Glue 1.34.121 service generated with mypy-boto3-builder 7.24.0" +version = "1.34.126" +description = "Type annotations for boto3.Glue 1.34.126 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.121-py3-none-any.whl", hash = "sha256:4af39a8d6b36f17bed52e59fa73b1760ec337e88ca0923c50cc33239b2b4a9ab"}, - {file = "mypy_boto3_glue-1.34.121.tar.gz", hash = "sha256:5885f8a7292665e3ee49ea8980be2500fb3411ba6975c287b3c817b418c58e2d"}, + {file = "mypy_boto3_glue-1.34.126-py3-none-any.whl", hash = "sha256:d6c447c2bf1cc9025043e2721c1a0bdbf2cd792fffc42cbb828de522a25337e0"}, + {file = "mypy_boto3_glue-1.34.126.tar.gz", hash = "sha256:ab14d2ad4df02e2d91e0be89bc123e7d3456cdd1ce9ff7104d6f99d0b7bb650d"}, ] [package.dependencies] From 1dde51a09097984d8bf298db08171b9d299ffd59 Mon Sep 17 00:00:00 2001 From: Chinmay Bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Sat, 15 Jun 2024 21:42:58 +0530 Subject: [PATCH 48/68] Support snapshot management operations like creating tags by adding `ManageSnapshots` API (#728) --- dev/provision.py | 47 +++++ mkdocs/docs/api.md | 22 +++ pyiceberg/table/__init__.py | 176 ++++++++++++++++++ tests/integration/test_snapshot_operations.py | 42 +++++ tests/table/test_init.py | 24 +++ 5 files changed, 311 insertions(+) create mode 100644 tests/integration/test_snapshot_operations.py diff --git a/dev/provision.py b/dev/provision.py index 44086caf20..6c8fe366d7 100644 --- a/dev/provision.py +++ b/dev/provision.py @@ -342,3 +342,50 @@ (array(), map(), array(struct(1))) """ ) + + spark.sql( + f""" + CREATE OR REPLACE TABLE {catalog_name}.default.test_table_snapshot_operations ( + number integer + ) + USING iceberg + TBLPROPERTIES ( + 'format-version'='2' + ); + """ + ) + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_table_snapshot_operations + VALUES (1) + """ + ) + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_table_snapshot_operations + VALUES (2) + """ + ) + + spark.sql( + f""" + DELETE FROM {catalog_name}.default.test_table_snapshot_operations + WHERE number = 2 + """ + ) + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_table_snapshot_operations + VALUES (3) + """ + ) + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_table_snapshot_operations + VALUES (4) + """ + ) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 70b5fd62eb..6bbd9abea1 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -913,6 +913,28 @@ tbl.overwrite(df, snapshot_properties={"abc": "def"}) assert tbl.metadata.snapshots[-1].summary["abc"] == "def" ``` +## Snapshot Management + +Manage snapshots with operations through the `Table` API: + +```python +# To run a specific operation +table.manage_snapshots().create_tag(snapshot_id, "tag123").commit() +# To run multiple operations +table.manage_snapshots() + .create_tag(snapshot_id1, "tag123") + .create_tag(snapshot_id2, "tag456") + .commit() +# Operations are applied on commit. +``` + +You can also use context managers to make more changes: + +```python +with table.manage_snapshots() as ms: + ms.create_branch(snapshot_id1, "Branch_A").create_tag(snapshot_id2, "tag789") +``` + ## Query the data To query a table, a table scan is needed. A table scan accepts a filter, columns, optionally a limit and a snapshot ID: diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 2d7f81a67d..9a10fc6bf5 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -138,6 +138,7 @@ ) from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.datetime import datetime_to_millis +from pyiceberg.utils.deprecated import deprecated from pyiceberg.utils.singleton import _convert_to_hashable_type if TYPE_CHECKING: @@ -351,6 +352,88 @@ def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: Any) -> updates = properties or kwargs return self._apply((SetPropertiesUpdate(updates=updates),)) + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def add_snapshot(self, snapshot: Snapshot) -> Transaction: + """Add a new snapshot to the table. + + Returns: + The transaction with the add-snapshot staged. + """ + updates = (AddSnapshotUpdate(snapshot=snapshot),) + + return self._apply(updates, ()) + + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def set_ref_snapshot( + self, + snapshot_id: int, + parent_snapshot_id: Optional[int], + ref_name: str, + type: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> Transaction: + """Update a ref to a snapshot. + + Returns: + The transaction with the set-snapshot-ref staged + """ + updates = ( + SetSnapshotRefUpdate( + snapshot_id=snapshot_id, + ref_name=ref_name, + type=type, + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ), + ) + + requirements = (AssertRefSnapshotId(snapshot_id=parent_snapshot_id, ref="main"),) + return self._apply(updates, requirements) + + def _set_ref_snapshot( + self, + snapshot_id: int, + ref_name: str, + type: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> UpdatesAndRequirements: + """Update a ref to a snapshot. + + Returns: + The updates and requirements for the set-snapshot-ref staged + """ + updates = ( + SetSnapshotRefUpdate( + snapshot_id=snapshot_id, + ref_name=ref_name, + type=type, + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ), + ) + requirements = ( + AssertRefSnapshotId( + snapshot_id=self.table_metadata.refs[ref_name].snapshot_id if ref_name in self.table_metadata.refs else None, + ref=ref_name, + ), + ) + + return updates, requirements + def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive: bool = True) -> UpdateSchema: """Create a new UpdateSchema to alter the columns of this table. @@ -1323,6 +1406,21 @@ def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" return self.metadata.snapshot_log + def manage_snapshots(self) -> ManageSnapshots: + """ + Shorthand to run snapshot management operations like create branch, create tag, etc. + + Use table.manage_snapshots().().commit() to run a specific operation. + Use table.manage_snapshots().().().commit() to run multiple operations. + Pending changes are applied on commit. + + We can also use context managers to make more changes. For example, + + with table.manage_snapshots() as ms: + ms.create_tag(snapshot_id1, "Tag_A").create_tag(snapshot_id2, "Tag_B") + """ + return ManageSnapshots(transaction=Transaction(self, autocommit=True)) + def update_schema(self, allow_incompatible_changes: bool = False, case_sensitive: bool = True) -> UpdateSchema: """Create a new UpdateSchema to alter the columns of this table. @@ -1835,6 +1933,84 @@ def __enter__(self) -> U: return self # type: ignore +class ManageSnapshots(UpdateTableMetadata["ManageSnapshots"]): + """ + Run snapshot management operations using APIs. + + APIs include create branch, create tag, etc. + + Use table.manage_snapshots().().commit() to run a specific operation. + Use table.manage_snapshots().().().commit() to run multiple operations. + Pending changes are applied on commit. + + We can also use context managers to make more changes. For example, + + with table.manage_snapshots() as ms: + ms.create_tag(snapshot_id1, "Tag_A").create_tag(snapshot_id2, "Tag_B") + """ + + _updates: Tuple[TableUpdate, ...] = () + _requirements: Tuple[TableRequirement, ...] = () + + def _commit(self) -> UpdatesAndRequirements: + """Apply the pending changes and commit.""" + return self._updates, self._requirements + + def create_tag(self, snapshot_id: int, tag_name: str, max_ref_age_ms: Optional[int] = None) -> ManageSnapshots: + """ + Create a new tag pointing to the given snapshot id. + + Args: + snapshot_id (int): snapshot id of the existing snapshot to tag + tag_name (str): name of the tag + max_ref_age_ms (Optional[int]): max ref age in milliseconds + + Returns: + This for method chaining + """ + update, requirement = self._transaction._set_ref_snapshot( + snapshot_id=snapshot_id, + ref_name=tag_name, + type="tag", + max_ref_age_ms=max_ref_age_ms, + ) + self._updates += update + self._requirements += requirement + return self + + def create_branch( + self, + snapshot_id: int, + branch_name: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> ManageSnapshots: + """ + Create a new branch pointing to the given snapshot id. + + Args: + snapshot_id (int): snapshot id of existing snapshot at which the branch is created. + branch_name (str): name of the new branch + max_ref_age_ms (Optional[int]): max ref age in milliseconds + max_snapshot_age_ms (Optional[int]): max age of snapshots to keep in milliseconds + min_snapshots_to_keep (Optional[int]): min number of snapshots to keep in milliseconds + Returns: + This for method chaining + """ + update, requirement = self._transaction._set_ref_snapshot( + snapshot_id=snapshot_id, + ref_name=branch_name, + type="branch", + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ) + self._updates += update + self._requirements += requirement + return self + + class UpdateSchema(UpdateTableMetadata["UpdateSchema"]): _schema: Schema _last_column_id: itertools.count[int] diff --git a/tests/integration/test_snapshot_operations.py b/tests/integration/test_snapshot_operations.py new file mode 100644 index 0000000000..639193383e --- /dev/null +++ b/tests/integration/test_snapshot_operations.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +from pyiceberg.catalog import Catalog +from pyiceberg.table.refs import SnapshotRef + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_create_tag(catalog: Catalog) -> None: + identifier = "default.test_table_snapshot_operations" + tbl = catalog.load_table(identifier) + assert len(tbl.history()) > 3 + tag_snapshot_id = tbl.history()[-3].snapshot_id + tbl.manage_snapshots().create_tag(snapshot_id=tag_snapshot_id, tag_name="tag123").commit() + assert tbl.metadata.refs["tag123"] == SnapshotRef(snapshot_id=tag_snapshot_id, snapshot_ref_type="tag") + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_create_branch(catalog: Catalog) -> None: + identifier = "default.test_table_snapshot_operations" + tbl = catalog.load_table(identifier) + assert len(tbl.history()) > 2 + branch_snapshot_id = tbl.history()[-2].snapshot_id + tbl.manage_snapshots().create_branch(snapshot_id=branch_snapshot_id, branch_name="branch123").commit() + assert tbl.metadata.refs["branch123"] == SnapshotRef(snapshot_id=branch_snapshot_id, snapshot_ref_type="branch") diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 20b77b6abd..c97b3a4aff 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -689,6 +689,30 @@ def test_update_metadata_add_snapshot(table_v2: Table) -> None: assert new_metadata.last_updated_ms == new_snapshot.timestamp_ms +def test_update_metadata_set_ref_snapshot(table_v2: Table) -> None: + update, _ = table_v2.transaction()._set_ref_snapshot( + snapshot_id=3051729675574597004, + ref_name="main", + type="branch", + max_ref_age_ms=123123123, + max_snapshot_age_ms=12312312312, + min_snapshots_to_keep=1, + ) + + new_metadata = update_table_metadata(table_v2.metadata, update) + assert len(new_metadata.snapshot_log) == 3 + assert new_metadata.snapshot_log[2].snapshot_id == 3051729675574597004 + assert new_metadata.current_snapshot_id == 3051729675574597004 + assert new_metadata.last_updated_ms > table_v2.metadata.last_updated_ms + assert new_metadata.refs["main"] == SnapshotRef( + snapshot_id=3051729675574597004, + snapshot_ref_type="branch", + min_snapshots_to_keep=1, + max_snapshot_age_ms=12312312312, + max_ref_age_ms=123123123, + ) + + def test_update_metadata_set_snapshot_ref(table_v2: Table) -> None: update = SetSnapshotRefUpdate( ref_name="main", From 772faad2e1b66a5a56453a24e4708534799e4ca5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 09:17:03 +0200 Subject: [PATCH 49/68] Bump mkdocs-material from 9.5.26 to 9.5.27 (#826) Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.5.26 to 9.5.27. - [Release notes](https://github.com/squidfunk/mkdocs-material/releases) - [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG) - [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.5.26...9.5.27) --- updated-dependencies: - dependency-name: mkdocs-material dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 1de5c07914..9a2d24ca35 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -23,6 +23,6 @@ mkdocstrings-python==1.10.3 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.26 +mkdocs-material==9.5.27 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.9 From a32bd7b21e20052870551d129e9717184035577e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 09:19:16 +0200 Subject: [PATCH 50/68] Bump mypy-boto3-glue from 1.34.126 to 1.34.128 (#825) Bumps [mypy-boto3-glue](https://github.com/youtype/mypy_boto3_builder) from 1.34.126 to 1.34.128. - [Release notes](https://github.com/youtype/mypy_boto3_builder/releases) - [Commits](https://github.com/youtype/mypy_boto3_builder/commits) --- updated-dependencies: - dependency-name: mypy-boto3-glue dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index a9b9b1a5a7..a24bb18310 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2489,13 +2489,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.126" -description = "Type annotations for boto3.Glue 1.34.126 service generated with mypy-boto3-builder 7.24.0" +version = "1.34.128" +description = "Type annotations for boto3.Glue 1.34.128 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.126-py3-none-any.whl", hash = "sha256:d6c447c2bf1cc9025043e2721c1a0bdbf2cd792fffc42cbb828de522a25337e0"}, - {file = "mypy_boto3_glue-1.34.126.tar.gz", hash = "sha256:ab14d2ad4df02e2d91e0be89bc123e7d3456cdd1ce9ff7104d6f99d0b7bb650d"}, + {file = "mypy_boto3_glue-1.34.128-py3-none-any.whl", hash = "sha256:34e8fffd2652590ef49789a52a076e5cd1165fc7073da3a0b99c4ec472a76b66"}, + {file = "mypy_boto3_glue-1.34.128.tar.gz", hash = "sha256:69b52458ade731c7691807d7b5af96e8f9a62d74cf614b5d9c77f906842e9d12"}, ] [package.dependencies] From f1e3107c7ba245964e1f742eddd3f424f472b210 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 09:19:41 +0200 Subject: [PATCH 51/68] Bump griffe from 0.45.3 to 0.46.1 (#824) Bumps [griffe](https://github.com/mkdocstrings/griffe) from 0.45.3 to 0.46.1. - [Release notes](https://github.com/mkdocstrings/griffe/releases) - [Changelog](https://github.com/mkdocstrings/griffe/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/griffe/compare/0.45.3...0.46.1) --- updated-dependencies: - dependency-name: griffe dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 9a2d24ca35..6a9916fc5d 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,7 +16,7 @@ # under the License. mkdocs==1.6.0 -griffe==0.45.3 +griffe==0.46.1 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.3 From 5c9fa7eb44b8560f9b6c68ff63dfe5936aaf8bb2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 09:20:08 +0200 Subject: [PATCH 52/68] Bump urllib3 from 1.26.18 to 1.26.19 (#823) Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.18 to 1.26.19. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/1.26.19/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.18...1.26.19) --- updated-dependencies: - dependency-name: urllib3 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index a24bb18310..8cf72d26ba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4104,13 +4104,13 @@ files = [ [[package]] name = "urllib3" -version = "1.26.18" +version = "1.26.19" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, - {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, + {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"}, + {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"}, ] [package.extras] @@ -4118,23 +4118,6 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "urllib3" -version = "2.0.7" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.7" -files = [ - {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, - {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - [[package]] name = "virtualenv" version = "20.25.0" From a29491af52dc4aff46a325bbaac4a11c2f2bfabc Mon Sep 17 00:00:00 2001 From: Andre Luis Anastacio Date: Tue, 18 Jun 2024 13:35:32 -0300 Subject: [PATCH 53/68] Remove recursive call from `ancestors_of` (#821) --- pyiceberg/table/snapshots.py | 11 ++--- tests/conftest.py | 86 +++++++++++++++++++++++++++++++++++- tests/table/test_init.py | 15 +++++++ 3 files changed, 106 insertions(+), 6 deletions(-) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index b21a0f5613..d6a3ff1654 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -421,8 +421,9 @@ def set_when_positive(properties: Dict[str, str], num: int, property_name: str) def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]: """Get the ancestors of and including the given snapshot.""" - if current_snapshot: - yield current_snapshot - if current_snapshot.parent_snapshot_id is not None: - if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): - yield from ancestors_of(parent, table_metadata) + snapshot = current_snapshot + while snapshot is not None: + yield snapshot + if snapshot.parent_snapshot_id is None: + break + snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id) diff --git a/tests/conftest.py b/tests/conftest.py index a160322ea3..2092d93d0e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,10 +29,11 @@ import re import socket import string +import time import uuid from datetime import date, datetime, timezone from pathlib import Path -from random import choice +from random import choice, randint from tempfile import TemporaryDirectory from typing import ( TYPE_CHECKING, @@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str, Any]: return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1 +@pytest.fixture +def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]: + def generate_snapshot( + snapshot_id: int, + parent_snapshot_id: Optional[int] = None, + timestamp_ms: Optional[int] = None, + sequence_number: int = 0, + ) -> Dict[str, Any]: + return { + "snapshot-id": snapshot_id, + "parent-snapshot-id": parent_snapshot_id, + "timestamp-ms": timestamp_ms or int(time.time() * 1000), + "sequence-number": sequence_number, + "summary": {"operation": "append"}, + "manifest-list": f"s3://a/b/{snapshot_id}.avro", + } + + snapshots = [] + snapshot_log = [] + initial_snapshot_id = 3051729675574597004 + + for i in range(2000): + snapshot_id = initial_snapshot_id + i + parent_snapshot_id = snapshot_id - 1 if i > 0 else None + timestamp_ms = int(time.time() * 1000) - randint(0, 1000000) + snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id, timestamp_ms, i)) + snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms": timestamp_ms}) + + return { + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]}, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [1, 2], + "fields": [ + {"id": 1, "name": "x", "required": True, "type": "long"}, + {"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"}, + {"id": 3, "name": "z", "required": True, "type": "long"}, + ], + }, + ], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}, + {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"}, + ], + } + ], + "properties": {"read.split.target.size": "134217728"}, + "current-snapshot-id": initial_snapshot_id + 1999, + "snapshots": snapshots, + "snapshot-log": snapshot_log, + "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}], + "refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag", "max-ref-age-ms": 10000000}}, + } + + EXAMPLE_TABLE_METADATA_V2 = { "format-version": 2, "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", @@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table: ) +@pytest.fixture +def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table: + table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots) + return Table( + identifier=("database", "table"), + metadata=table_metadata, + metadata_location=f"{table_metadata.location}/uuid.metadata.json", + io=load_file_io(), + catalog=NoopCatalog("NoopCatalog"), + ) + + @pytest.fixture def bound_reference_str() -> BoundReference[str]: return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None)) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index c97b3a4aff..6f8260fa56 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -241,6 +241,21 @@ def test_ancestors_of(table_v2: Table) -> None: ] +def test_ancestors_of_recursive_error(table_v2_with_extensive_snapshots: Table) -> None: + # Test RecursionError: maximum recursion depth exceeded + assert ( + len( + list( + ancestors_of( + table_v2_with_extensive_snapshots.current_snapshot(), + table_v2_with_extensive_snapshots.metadata, + ) + ) + ) + == 2000 + ) + + def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None: assert table_v2.snapshot_by_id(-1) is None From 4c0d2182c2fc819d81cf34ff9b144a0570fc4deb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 09:53:32 +0200 Subject: [PATCH 54/68] Bump mkdocstrings-python from 1.10.3 to 1.10.5 (#839) Bumps [mkdocstrings-python](https://github.com/mkdocstrings/python) from 1.10.3 to 1.10.5. - [Release notes](https://github.com/mkdocstrings/python/releases) - [Changelog](https://github.com/mkdocstrings/python/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/python/compare/1.10.3...1.10.5) --- updated-dependencies: - dependency-name: mkdocstrings-python dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 6a9916fc5d..ce88c71366 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -19,7 +19,7 @@ mkdocs==1.6.0 griffe==0.46.1 jinja2==3.1.4 mkdocstrings==0.25.1 -mkdocstrings-python==1.10.3 +mkdocstrings-python==1.10.5 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 From 25d51869ba3f8b4aeb7727475ce388a45ab6a9fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:09:27 +0200 Subject: [PATCH 55/68] Bump griffe from 0.46.1 to 0.47.0 (#831) Bumps [griffe](https://github.com/mkdocstrings/griffe) from 0.46.1 to 0.47.0. - [Release notes](https://github.com/mkdocstrings/griffe/releases) - [Changelog](https://github.com/mkdocstrings/griffe/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/griffe/compare/0.46.1...0.47.0) --- updated-dependencies: - dependency-name: griffe dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- mkdocs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index ce88c71366..292036d9ef 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,7 +16,7 @@ # under the License. mkdocs==1.6.0 -griffe==0.46.1 +griffe==0.47.0 jinja2==3.1.4 mkdocstrings==0.25.1 mkdocstrings-python==1.10.5 From a537d2abefbe0f6537bfc47914c9cc9c88969a95 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 12:29:10 +0200 Subject: [PATCH 56/68] Bump getdaft from 0.2.27 to 0.2.28 (#834) Bumps [getdaft](https://github.com/Eventual-Inc/Daft) from 0.2.27 to 0.2.28. - [Release notes](https://github.com/Eventual-Inc/Daft/releases) - [Commits](https://github.com/Eventual-Inc/Daft/compare/v0.2.27...v0.2.28) --- updated-dependencies: - dependency-name: getdaft dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8cf72d26ba..3e41a0ed5b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1343,17 +1343,17 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.2.27" +version = "0.2.28" description = "Distributed Dataframes for Multimodal Data" optional = true python-versions = ">=3.8" files = [ - {file = "getdaft-0.2.27-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:13f75cd4fa5037760757743fbd04fdcdf5c8294dd7975cc369081f9a2c53e49a"}, - {file = "getdaft-0.2.27-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2d31c0ecb211e8801c158702c53659be13db82c1656aac67cdaa4f8dad6e29e9"}, - {file = "getdaft-0.2.27-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f71bd99964105dc8464fe568c53464f6a44db116bc743cdbc7a5cc83fb126318"}, - {file = "getdaft-0.2.27-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de90e30ebd727423afe32cd2333a7bfa4fceff6a5cc69e3af3839af37f0afdd7"}, - {file = "getdaft-0.2.27-cp38-abi3-win_amd64.whl", hash = "sha256:9eba98926f7fac3e15d63a82a2b510afae454e6e6e509e2026aeebe3a3f74b3d"}, - {file = "getdaft-0.2.27.tar.gz", hash = "sha256:fcb62ddc260c7a8ac8cfaada87d5dd38b46886b02d9b8fe57a27d2aa176325d3"}, + {file = "getdaft-0.2.28-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:052632bf156dfabc61b00bc3e055f11c045ed1011818ed398e82bee549346510"}, + {file = "getdaft-0.2.28-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6d120504f05dadac6fa0c170558f2635e5654d1e49ffcd95c20952847427e069"}, + {file = "getdaft-0.2.28-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:477d77f695129843d1bdfe3896d17cd5af43024e06c1956077f6afe2069e4dcf"}, + {file = "getdaft-0.2.28-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da7be6b900798061090ea99f474ad1d128fb493f958c17854248eacfad68a969"}, + {file = "getdaft-0.2.28-cp38-abi3-win_amd64.whl", hash = "sha256:679a9d26f76f695f4fa3c51c732c02f511eeb5a832b305bbd237c2e62333f815"}, + {file = "getdaft-0.2.28.tar.gz", hash = "sha256:1389ef47caa61f0daf3217b4bd5042b50e854bfb1315b104341110c09a6c072f"}, ] [package.dependencies] @@ -1363,7 +1363,7 @@ tqdm = "*" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} [package.extras] -all = ["getdaft[aws,azure,deltalake,gcp,iceberg,numpy,pandas,ray,sql]"] +all = ["getdaft[aws,azure,deltalake,gcp,iceberg,numpy,pandas,ray,sql,unity]"] aws = ["boto3"] deltalake = ["deltalake"] hudi = ["pyarrow (>=8.0.0)"] @@ -1373,6 +1373,7 @@ numpy = ["numpy"] pandas = ["pandas"] ray = ["packaging", "ray[client,data] (>=2.0.0)", "ray[client,data] (>=2.10.0)"] sql = ["connectorx", "sqlalchemy", "sqlglot"] +unity = ["unitycatalog"] [[package]] name = "google-api-core" From 4767d1d4863a8f5101f4d03d9ebacf0f254111e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:12:13 +0200 Subject: [PATCH 57/68] Bump tenacity from 8.3.0 to 8.4.1 (#833) Bumps [tenacity](https://github.com/jd/tenacity) from 8.3.0 to 8.4.1. - [Release notes](https://github.com/jd/tenacity/releases) - [Commits](https://github.com/jd/tenacity/compare/8.3.0...8.4.1) --- updated-dependencies: - dependency-name: tenacity dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3e41a0ed5b..3e9cb61340 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4019,13 +4019,13 @@ mpmath = ">=0.19" [[package]] name = "tenacity" -version = "8.3.0" +version = "8.4.1" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" files = [ - {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, - {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, + {file = "tenacity-8.4.1-py3-none-any.whl", hash = "sha256:28522e692eda3e1b8f5e99c51464efcc0b9fc86933da92415168bc1c4e2308fa"}, + {file = "tenacity-8.4.1.tar.gz", hash = "sha256:54b1412b878ddf7e1f1577cd49527bad8cdef32421bd599beac0c6c3f10582fd"}, ] [package.extras] From a94463afaf77527590620dbca82c1740863f4442 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 00:50:13 +0200 Subject: [PATCH 58/68] Bump sqlalchemy from 2.0.30 to 2.0.31 (#842) Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.30 to 2.0.31. - [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases) - [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst) - [Commits](https://github.com/sqlalchemy/sqlalchemy/commits) --- updated-dependencies: - dependency-name: sqlalchemy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 102 ++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3e9cb61340..cbc6bc1526 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3904,64 +3904,64 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.30" +version = "2.0.31" description = "Database Abstraction Library" optional = true python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.30-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3b48154678e76445c7ded1896715ce05319f74b1e73cf82d4f8b59b46e9c0ddc"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2753743c2afd061bb95a61a51bbb6a1a11ac1c44292fad898f10c9839a7f75b2"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7bfc726d167f425d4c16269a9a10fe8630ff6d14b683d588044dcef2d0f6be7"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4f61ada6979223013d9ab83a3ed003ded6959eae37d0d685db2c147e9143797"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a365eda439b7a00732638f11072907c1bc8e351c7665e7e5da91b169af794af"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bba002a9447b291548e8d66fd8c96a6a7ed4f2def0bb155f4f0a1309fd2735d5"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-win32.whl", hash = "sha256:0138c5c16be3600923fa2169532205d18891b28afa817cb49b50e08f62198bb8"}, - {file = "SQLAlchemy-2.0.30-cp310-cp310-win_amd64.whl", hash = "sha256:99650e9f4cf3ad0d409fed3eec4f071fadd032e9a5edc7270cd646a26446feeb"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:955991a09f0992c68a499791a753523f50f71a6885531568404fa0f231832aa0"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f69e4c756ee2686767eb80f94c0125c8b0a0b87ede03eacc5c8ae3b54b99dc46"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69c9db1ce00e59e8dd09d7bae852a9add716efdc070a3e2068377e6ff0d6fdaa"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1429a4b0f709f19ff3b0cf13675b2b9bfa8a7e79990003207a011c0db880a13"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:efedba7e13aa9a6c8407c48facfdfa108a5a4128e35f4c68f20c3407e4376aa9"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16863e2b132b761891d6c49f0a0f70030e0bcac4fd208117f6b7e053e68668d0"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-win32.whl", hash = "sha256:2ecabd9ccaa6e914e3dbb2aa46b76dede7eadc8cbf1b8083c94d936bcd5ffb49"}, - {file = "SQLAlchemy-2.0.30-cp311-cp311-win_amd64.whl", hash = "sha256:0b3f4c438e37d22b83e640f825ef0f37b95db9aa2d68203f2c9549375d0b2260"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5a79d65395ac5e6b0c2890935bad892eabb911c4aa8e8015067ddb37eea3d56c"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9a5baf9267b752390252889f0c802ea13b52dfee5e369527da229189b8bd592e"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cb5a646930c5123f8461f6468901573f334c2c63c795b9af350063a736d0134"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:296230899df0b77dec4eb799bcea6fbe39a43707ce7bb166519c97b583cfcab3"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c62d401223f468eb4da32627bffc0c78ed516b03bb8a34a58be54d618b74d472"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3b69e934f0f2b677ec111b4d83f92dc1a3210a779f69bf905273192cf4ed433e"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-win32.whl", hash = "sha256:77d2edb1f54aff37e3318f611637171e8ec71472f1fdc7348b41dcb226f93d90"}, - {file = "SQLAlchemy-2.0.30-cp312-cp312-win_amd64.whl", hash = "sha256:b6c7ec2b1f4969fc19b65b7059ed00497e25f54069407a8701091beb69e591a5"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5a8e3b0a7e09e94be7510d1661339d6b52daf202ed2f5b1f9f48ea34ee6f2d57"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b60203c63e8f984df92035610c5fb76d941254cf5d19751faab7d33b21e5ddc0"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1dc3eabd8c0232ee8387fbe03e0a62220a6f089e278b1f0aaf5e2d6210741ad"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:40ad017c672c00b9b663fcfcd5f0864a0a97828e2ee7ab0c140dc84058d194cf"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e42203d8d20dc704604862977b1470a122e4892791fe3ed165f041e4bf447a1b"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-win32.whl", hash = "sha256:2a4f4da89c74435f2bc61878cd08f3646b699e7d2eba97144030d1be44e27584"}, - {file = "SQLAlchemy-2.0.30-cp37-cp37m-win_amd64.whl", hash = "sha256:b6bf767d14b77f6a18b6982cbbf29d71bede087edae495d11ab358280f304d8e"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc0c53579650a891f9b83fa3cecd4e00218e071d0ba00c4890f5be0c34887ed3"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:311710f9a2ee235f1403537b10c7687214bb1f2b9ebb52702c5aa4a77f0b3af7"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:408f8b0e2c04677e9c93f40eef3ab22f550fecb3011b187f66a096395ff3d9fd"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37a4b4fb0dd4d2669070fb05b8b8824afd0af57587393015baee1cf9890242d9"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a943d297126c9230719c27fcbbeab57ecd5d15b0bd6bfd26e91bfcfe64220621"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0a089e218654e740a41388893e090d2e2c22c29028c9d1353feb38638820bbeb"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-win32.whl", hash = "sha256:fa561138a64f949f3e889eb9ab8c58e1504ab351d6cf55259dc4c248eaa19da6"}, - {file = "SQLAlchemy-2.0.30-cp38-cp38-win_amd64.whl", hash = "sha256:7d74336c65705b986d12a7e337ba27ab2b9d819993851b140efdf029248e818e"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae8c62fe2480dd61c532ccafdbce9b29dacc126fe8be0d9a927ca3e699b9491a"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2383146973a15435e4717f94c7509982770e3e54974c71f76500a0136f22810b"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8409de825f2c3b62ab15788635ccaec0c881c3f12a8af2b12ae4910a0a9aeef6"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0094c5dc698a5f78d3d1539853e8ecec02516b62b8223c970c86d44e7a80f6c7"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:edc16a50f5e1b7a06a2dcc1f2205b0b961074c123ed17ebda726f376a5ab0953"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f7703c2010355dd28f53deb644a05fc30f796bd8598b43f0ba678878780b6e4c"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-win32.whl", hash = "sha256:1f9a727312ff6ad5248a4367358e2cf7e625e98b1028b1d7ab7b806b7d757513"}, - {file = "SQLAlchemy-2.0.30-cp39-cp39-win_amd64.whl", hash = "sha256:a0ef36b28534f2a5771191be6edb44cc2673c7b2edf6deac6562400288664221"}, - {file = "SQLAlchemy-2.0.30-py3-none-any.whl", hash = "sha256:7108d569d3990c71e26a42f60474b4c02c8586c4681af5fd67e51a044fdea86a"}, - {file = "SQLAlchemy-2.0.30.tar.gz", hash = "sha256:2b1708916730f4830bc69d6f49d37f7698b5bd7530aca7f04f785f8849e95255"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2a213c1b699d3f5768a7272de720387ae0122f1becf0901ed6eaa1abd1baf6c"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9fea3d0884e82d1e33226935dac990b967bef21315cbcc894605db3441347443"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3ad7f221d8a69d32d197e5968d798217a4feebe30144986af71ada8c548e9fa"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2bee229715b6366f86a95d497c347c22ddffa2c7c96143b59a2aa5cc9eebbc"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cd5b94d4819c0c89280b7c6109c7b788a576084bf0a480ae17c227b0bc41e109"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:750900a471d39a7eeba57580b11983030517a1f512c2cb287d5ad0fcf3aebd58"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-win32.whl", hash = "sha256:7bd112be780928c7f493c1a192cd8c5fc2a2a7b52b790bc5a84203fb4381c6be"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-win_amd64.whl", hash = "sha256:5a48ac4d359f058474fadc2115f78a5cdac9988d4f99eae44917f36aa1476327"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f68470edd70c3ac3b6cd5c2a22a8daf18415203ca1b036aaeb9b0fb6f54e8298"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e2c38c2a4c5c634fe6c3c58a789712719fa1bf9b9d6ff5ebfce9a9e5b89c1ca"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd15026f77420eb2b324dcb93551ad9c5f22fab2c150c286ef1dc1160f110203"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2196208432deebdfe3b22185d46b08f00ac9d7b01284e168c212919891289396"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:352b2770097f41bff6029b280c0e03b217c2dcaddc40726f8f53ed58d8a85da4"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56d51ae825d20d604583f82c9527d285e9e6d14f9a5516463d9705dab20c3740"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-win32.whl", hash = "sha256:6e2622844551945db81c26a02f27d94145b561f9d4b0c39ce7bfd2fda5776dac"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-win_amd64.whl", hash = "sha256:ccaf1b0c90435b6e430f5dd30a5aede4764942a695552eb3a4ab74ed63c5b8d3"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3b74570d99126992d4b0f91fb87c586a574a5872651185de8297c6f90055ae42"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f77c4f042ad493cb8595e2f503c7a4fe44cd7bd59c7582fd6d78d7e7b8ec52c"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd1591329333daf94467e699e11015d9c944f44c94d2091f4ac493ced0119449"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74afabeeff415e35525bf7a4ecdab015f00e06456166a2eba7590e49f8db940e"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b9c01990d9015df2c6f818aa8f4297d42ee71c9502026bb074e713d496e26b67"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:66f63278db425838b3c2b1c596654b31939427016ba030e951b292e32b99553e"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-win32.whl", hash = "sha256:0b0f658414ee4e4b8cbcd4a9bb0fd743c5eeb81fc858ca517217a8013d282c96"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-win_amd64.whl", hash = "sha256:fa4b1af3e619b5b0b435e333f3967612db06351217c58bfb50cee5f003db2a5a"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f43e93057cf52a227eda401251c72b6fbe4756f35fa6bfebb5d73b86881e59b0"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d337bf94052856d1b330d5fcad44582a30c532a2463776e1651bd3294ee7e58b"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c06fb43a51ccdff3b4006aafee9fcf15f63f23c580675f7734245ceb6b6a9e05"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:b6e22630e89f0e8c12332b2b4c282cb01cf4da0d26795b7eae16702a608e7ca1"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:79a40771363c5e9f3a77f0e28b3302801db08040928146e6808b5b7a40749c88"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-win32.whl", hash = "sha256:501ff052229cb79dd4c49c402f6cb03b5a40ae4771efc8bb2bfac9f6c3d3508f"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-win_amd64.whl", hash = "sha256:597fec37c382a5442ffd471f66ce12d07d91b281fd474289356b1a0041bdf31d"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dc6d69f8829712a4fd799d2ac8d79bdeff651c2301b081fd5d3fe697bd5b4ab9"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23b9fbb2f5dd9e630db70fbe47d963c7779e9c81830869bd7d137c2dc1ad05fb"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a21c97efcbb9f255d5c12a96ae14da873233597dfd00a3a0c4ce5b3e5e79704"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26a6a9837589c42b16693cf7bf836f5d42218f44d198f9343dd71d3164ceeeac"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc251477eae03c20fae8db9c1c23ea2ebc47331bcd73927cdcaecd02af98d3c3"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:2fd17e3bb8058359fa61248c52c7b09a97cf3c820e54207a50af529876451808"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-win32.whl", hash = "sha256:c76c81c52e1e08f12f4b6a07af2b96b9b15ea67ccdd40ae17019f1c373faa227"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-win_amd64.whl", hash = "sha256:4b600e9a212ed59355813becbcf282cfda5c93678e15c25a0ef896b354423238"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b6cf796d9fcc9b37011d3f9936189b3c8074a02a4ed0c0fbbc126772c31a6d4"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:78fe11dbe37d92667c2c6e74379f75746dc947ee505555a0197cfba9a6d4f1a4"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc47dc6185a83c8100b37acda27658fe4dbd33b7d5e7324111f6521008ab4fe"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a41514c1a779e2aa9a19f67aaadeb5cbddf0b2b508843fcd7bafdf4c6864005"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:afb6dde6c11ea4525318e279cd93c8734b795ac8bb5dda0eedd9ebaca7fa23f1"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3f9faef422cfbb8fd53716cd14ba95e2ef655400235c3dfad1b5f467ba179c8c"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-win32.whl", hash = "sha256:fc6b14e8602f59c6ba893980bea96571dd0ed83d8ebb9c4479d9ed5425d562e9"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-win_amd64.whl", hash = "sha256:3cb8a66b167b033ec72c3812ffc8441d4e9f5f78f5e31e54dcd4c90a4ca5bebc"}, + {file = "SQLAlchemy-2.0.31-py3-none-any.whl", hash = "sha256:69f3e3c08867a8e4856e92d7afb618b95cdee18e0bc1647b77599722c9a28911"}, + {file = "SQLAlchemy-2.0.31.tar.gz", hash = "sha256:b607489dd4a54de56984a0c7656247504bd5523d9d0ba799aef59d4add009484"}, ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} typing-extensions = ">=4.6.0" [package.extras] From 34d08fde54fd00154bf7353644fa51493e3aab75 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:07:39 +0200 Subject: [PATCH 59/68] Bump mypy-boto3-glue from 1.34.128 to 1.34.131 (#844) Bumps [mypy-boto3-glue](https://github.com/youtype/mypy_boto3_builder) from 1.34.128 to 1.34.131. - [Release notes](https://github.com/youtype/mypy_boto3_builder/releases) - [Commits](https://github.com/youtype/mypy_boto3_builder/commits) --- updated-dependencies: - dependency-name: mypy-boto3-glue dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index cbc6bc1526..1914d1835d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2490,13 +2490,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.128" -description = "Type annotations for boto3.Glue 1.34.128 service generated with mypy-boto3-builder 7.24.0" +version = "1.34.131" +description = "Type annotations for boto3.Glue 1.34.131 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.128-py3-none-any.whl", hash = "sha256:34e8fffd2652590ef49789a52a076e5cd1165fc7073da3a0b99c4ec472a76b66"}, - {file = "mypy_boto3_glue-1.34.128.tar.gz", hash = "sha256:69b52458ade731c7691807d7b5af96e8f9a62d74cf614b5d9c77f906842e9d12"}, + {file = "mypy_boto3_glue-1.34.131-py3-none-any.whl", hash = "sha256:b1c6cc67749f991f95106454eaad9061057d1381b8d290ed8020d15005e15071"}, + {file = "mypy_boto3_glue-1.34.131.tar.gz", hash = "sha256:4a67430aa68456817d0c1afa52ff529e0d8eccf8478a322020b143cc4be04ef5"}, ] [package.dependencies] From 2182060836e2c39a73052ac4c8cf30141becd500 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:29:48 +0200 Subject: [PATCH 60/68] Bump python-snappy from 0.7.1 to 0.7.2 (#843) Bumps [python-snappy](https://github.com/intake/python-snappy) from 0.7.1 to 0.7.2. - [Commits](https://github.com/intake/python-snappy/compare/0.7.1...0.7.2) --- updated-dependencies: - dependency-name: python-snappy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1914d1835d..085d6836fc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3304,13 +3304,13 @@ six = ">=1.5" [[package]] name = "python-snappy" -version = "0.7.1" +version = "0.7.2" description = "Python library for the snappy compression library from Google" optional = true python-versions = "*" files = [ - {file = "python-snappy-0.7.1.tar.gz", hash = "sha256:1bc29d36211d44bb9f04f3d7ccfbaeaebbc2f62b6d40f4fc4edd1fb16bc52c13"}, - {file = "python_snappy-0.7.1-py3-none-any.whl", hash = "sha256:7c9111be1ae1dcbf4ce32b752366d4a5d4f07898d517691c4003d41e04b03488"}, + {file = "python_snappy-0.7.2-py3-none-any.whl", hash = "sha256:b4b2c39142064925d5a554672a09de4188fc4f2b2494a2ecb35042930e129444"}, + {file = "python_snappy-0.7.2.tar.gz", hash = "sha256:04bf182f9d9f67b7a846dae2f1df36180ceeee8d3380e4b6799deff5272c4978"}, ] [package.dependencies] From b8c5bb77c5ea436aeced17676aa30d09c1224ed9 Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:44:24 -0400 Subject: [PATCH 61/68] Support `Table.to_arrow_batch_reader` (#786) * _task_to_table to _task_to_record_batches * to_arrow_batches * tests * fix * fix * deletes * batch reader * merge main * adopt review feedback --- mkdocs/docs/api.md | 9 ++ pyiceberg/io/pyarrow.py | 155 ++++++++++++++++++++++++-------- pyiceberg/table/__init__.py | 18 ++++ tests/integration/test_reads.py | 126 ++++++++++++++++++++++++++ 4 files changed, 269 insertions(+), 39 deletions(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 6bbd9abea1..54f4a20c57 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1003,6 +1003,15 @@ tpep_dropoff_datetime: [[2021-04-01 00:47:59.000000,...,2021-05-01 00:14:47.0000 This will only pull in the files that that might contain matching rows. +One can also return a PyArrow RecordBatchReader, if reading one record batch at a time is preferred: + +```python +table.scan( + row_filter=GreaterThanOrEqual("trip_distance", 10.0), + selected_fields=("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime"), +).to_arrow_batch_reader() +``` + ### Pandas diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 935b78cece..e6490ae156 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -655,12 +655,12 @@ def _read_deletes(fs: FileSystem, data_file: DataFile) -> Dict[str, pa.ChunkedAr } -def _combine_positional_deletes(positional_deletes: List[pa.ChunkedArray], rows: int) -> pa.Array: +def _combine_positional_deletes(positional_deletes: List[pa.ChunkedArray], start_index: int, end_index: int) -> pa.Array: if len(positional_deletes) == 1: all_chunks = positional_deletes[0] else: all_chunks = pa.chunked_array(itertools.chain(*[arr.chunks for arr in positional_deletes])) - return np.setdiff1d(np.arange(rows), all_chunks, assume_unique=False) + return np.subtract(np.setdiff1d(np.arange(start_index, end_index), all_chunks, assume_unique=False), start_index) def pyarrow_to_schema(schema: pa.Schema, name_mapping: Optional[NameMapping] = None) -> Schema: @@ -995,7 +995,7 @@ def _field_id(self, field: pa.Field) -> int: return -1 -def _task_to_table( +def _task_to_record_batches( fs: FileSystem, task: FileScanTask, bound_row_filter: BooleanExpression, @@ -1003,9 +1003,8 @@ def _task_to_table( projected_field_ids: Set[int], positional_deletes: Optional[List[ChunkedArray]], case_sensitive: bool, - limit: Optional[int] = None, name_mapping: Optional[NameMapping] = None, -) -> Optional[pa.Table]: +) -> Iterator[pa.RecordBatch]: _, _, path = PyArrowFileIO.parse_location(task.file.file_path) arrow_format = ds.ParquetFileFormat(pre_buffer=True, buffer_size=(ONE_MEGABYTE * 8)) with fs.open_input_file(path) as fin: @@ -1035,36 +1034,39 @@ def _task_to_table( columns=[col.name for col in file_project_schema.columns], ) - if positional_deletes: - # Create the mask of indices that we're interested in - indices = _combine_positional_deletes(positional_deletes, fragment.count_rows()) - - if limit: - if pyarrow_filter is not None: - # In case of the filter, we don't exactly know how many rows - # we need to fetch upfront, can be optimized in the future: - # https://github.com/apache/arrow/issues/35301 - arrow_table = fragment_scanner.take(indices) - arrow_table = arrow_table.filter(pyarrow_filter) - arrow_table = arrow_table.slice(0, limit) - else: - arrow_table = fragment_scanner.take(indices[0:limit]) - else: - arrow_table = fragment_scanner.take(indices) + current_index = 0 + batches = fragment_scanner.to_batches() + for batch in batches: + if positional_deletes: + # Create the mask of indices that we're interested in + indices = _combine_positional_deletes(positional_deletes, current_index, current_index + len(batch)) + batch = batch.take(indices) # Apply the user filter if pyarrow_filter is not None: + # we need to switch back and forth between RecordBatch and Table + # as Expression filter isn't yet supported in RecordBatch + # https://github.com/apache/arrow/issues/39220 + arrow_table = pa.Table.from_batches([batch]) arrow_table = arrow_table.filter(pyarrow_filter) - else: - # If there are no deletes, we can just take the head - # and the user-filter is already applied - if limit: - arrow_table = fragment_scanner.head(limit) - else: - arrow_table = fragment_scanner.to_table() + batch = arrow_table.to_batches()[0] + yield to_requested_schema(projected_schema, file_project_schema, batch) + current_index += len(batch) - if len(arrow_table) < 1: - return None - return to_requested_schema(projected_schema, file_project_schema, arrow_table) + +def _task_to_table( + fs: FileSystem, + task: FileScanTask, + bound_row_filter: BooleanExpression, + projected_schema: Schema, + projected_field_ids: Set[int], + positional_deletes: Optional[List[ChunkedArray]], + case_sensitive: bool, + name_mapping: Optional[NameMapping] = None, +) -> pa.Table: + batches = _task_to_record_batches( + fs, task, bound_row_filter, projected_schema, projected_field_ids, positional_deletes, case_sensitive, name_mapping + ) + return pa.Table.from_batches(batches, schema=schema_to_pyarrow(projected_schema, include_field_ids=False)) def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dict[str, List[ChunkedArray]]: @@ -1143,7 +1145,6 @@ def project_table( projected_field_ids, deletes_per_file.get(task.file.file_path), case_sensitive, - limit, table_metadata.name_mapping(), ) for task in tasks @@ -1177,8 +1178,78 @@ def project_table( return result -def to_requested_schema(requested_schema: Schema, file_schema: Schema, table: pa.Table) -> pa.Table: - struct_array = visit_with_partner(requested_schema, table, ArrowProjectionVisitor(file_schema), ArrowAccessor(file_schema)) +def project_batches( + tasks: Iterable[FileScanTask], + table_metadata: TableMetadata, + io: FileIO, + row_filter: BooleanExpression, + projected_schema: Schema, + case_sensitive: bool = True, + limit: Optional[int] = None, +) -> Iterator[pa.RecordBatch]: + """Resolve the right columns based on the identifier. + + Args: + tasks (Iterable[FileScanTask]): A URI or a path to a local file. + table_metadata (TableMetadata): The table metadata of the table that's being queried + io (FileIO): A FileIO to open streams to the object store + row_filter (BooleanExpression): The expression for filtering rows. + projected_schema (Schema): The output schema. + case_sensitive (bool): Case sensitivity when looking up column names. + limit (Optional[int]): Limit the number of records. + + Raises: + ResolveError: When an incompatible query is done. + """ + scheme, netloc, _ = PyArrowFileIO.parse_location(table_metadata.location) + if isinstance(io, PyArrowFileIO): + fs = io.fs_by_scheme(scheme, netloc) + else: + try: + from pyiceberg.io.fsspec import FsspecFileIO + + if isinstance(io, FsspecFileIO): + from pyarrow.fs import PyFileSystem + + fs = PyFileSystem(FSSpecHandler(io.get_fs(scheme))) + else: + raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") + except ModuleNotFoundError as e: + # When FsSpec is not installed + raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e + + bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive) + + projected_field_ids = { + id for id in projected_schema.field_ids if not isinstance(projected_schema.find_type(id), (MapType, ListType)) + }.union(extract_field_ids(bound_row_filter)) + + deletes_per_file = _read_all_delete_files(fs, tasks) + + total_row_count = 0 + + for task in tasks: + batches = _task_to_record_batches( + fs, + task, + bound_row_filter, + projected_schema, + projected_field_ids, + deletes_per_file.get(task.file.file_path), + case_sensitive, + table_metadata.name_mapping(), + ) + for batch in batches: + if limit is not None: + if total_row_count + len(batch) >= limit: + yield batch.slice(0, limit - total_row_count) + break + yield batch + total_row_count += len(batch) + + +def to_requested_schema(requested_schema: Schema, file_schema: Schema, batch: pa.RecordBatch) -> pa.RecordBatch: + struct_array = visit_with_partner(requested_schema, batch, ArrowProjectionVisitor(file_schema), ArrowAccessor(file_schema)) arrays = [] fields = [] @@ -1186,7 +1257,7 @@ def to_requested_schema(requested_schema: Schema, file_schema: Schema, table: pa array = struct_array.field(pos) arrays.append(array) fields.append(pa.field(field.name, array.type, field.optional)) - return pa.Table.from_arrays(arrays, schema=pa.schema(fields)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields)) class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, Optional[pa.Array]]): @@ -1293,8 +1364,10 @@ def field_partner(self, partner_struct: Optional[pa.Array], field_id: int, _: st if isinstance(partner_struct, pa.StructArray): return partner_struct.field(name) - elif isinstance(partner_struct, pa.Table): - return partner_struct.column(name).combine_chunks() + elif isinstance(partner_struct, pa.RecordBatch): + return partner_struct.column(name) + else: + raise ValueError(f"Cannot find {name} in expected partner_struct type {type(partner_struct)}") return None @@ -1831,7 +1904,7 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT def write_parquet(task: WriteTask) -> DataFile: table_schema = task.schema - arrow_table = pa.Table.from_batches(task.record_batches) + # if schema needs to be transformed, use the transformed schema and adjust the arrow table accordingly # otherwise use the original schema if (sanitized_schema := sanitize_column_names(table_schema)) != table_schema: @@ -1839,7 +1912,11 @@ def write_parquet(task: WriteTask) -> DataFile: else: file_schema = table_schema - arrow_table = to_requested_schema(requested_schema=file_schema, file_schema=table_schema, table=arrow_table) + batches = [ + to_requested_schema(requested_schema=file_schema, file_schema=table_schema, batch=batch) + for batch in task.record_batches + ] + arrow_table = pa.Table.from_batches(batches) file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}' fo = io.new_output(file_path) with fo.create(overwrite=True) as fos: diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 9a10fc6bf5..c78e005cac 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1878,6 +1878,24 @@ def to_arrow(self) -> pa.Table: limit=self.limit, ) + def to_arrow_batch_reader(self) -> pa.RecordBatchReader: + import pyarrow as pa + + from pyiceberg.io.pyarrow import project_batches, schema_to_pyarrow + + return pa.RecordBatchReader.from_batches( + schema_to_pyarrow(self.projection()), + project_batches( + self.plan_files(), + self.table_metadata, + self.io, + self.row_filter, + self.projection(), + case_sensitive=self.case_sensitive, + limit=self.limit, + ), + ) + def to_pandas(self, **kwargs: Any) -> pd.DataFrame: return self.to_arrow().to_pandas(**kwargs) diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 80a6f18632..078abf406a 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -21,6 +21,7 @@ import uuid from urllib.parse import urlparse +import pyarrow as pa import pyarrow.parquet as pq import pytest from hive_metastore.ttypes import LockRequest, LockResponse, LockState, UnlockRequest @@ -174,6 +175,47 @@ def test_pyarrow_not_nan_count(catalog: Catalog) -> None: assert len(not_nan) == 2 +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_pyarrow_batches_nan(catalog: Catalog) -> None: + table_test_null_nan = catalog.load_table("default.test_null_nan") + arrow_batch_reader = table_test_null_nan.scan( + row_filter=IsNaN("col_numeric"), selected_fields=("idx", "col_numeric") + ).to_arrow_batch_reader() + assert isinstance(arrow_batch_reader, pa.RecordBatchReader) + arrow_table = arrow_batch_reader.read_all() + assert len(arrow_table) == 1 + assert arrow_table["idx"][0].as_py() == 1 + assert math.isnan(arrow_table["col_numeric"][0].as_py()) + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_pyarrow_batches_nan_rewritten(catalog: Catalog) -> None: + table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") + arrow_batch_reader = table_test_null_nan_rewritten.scan( + row_filter=IsNaN("col_numeric"), selected_fields=("idx", "col_numeric") + ).to_arrow_batch_reader() + assert isinstance(arrow_batch_reader, pa.RecordBatchReader) + arrow_table = arrow_batch_reader.read_all() + assert len(arrow_table) == 1 + assert arrow_table["idx"][0].as_py() == 1 + assert math.isnan(arrow_table["col_numeric"][0].as_py()) + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +@pytest.mark.skip(reason="Fixing issues with NaN's: https://github.com/apache/arrow/issues/34162") +def test_pyarrow_batches_not_nan_count(catalog: Catalog) -> None: + table_test_null_nan = catalog.load_table("default.test_null_nan") + arrow_batch_reader = table_test_null_nan.scan( + row_filter=NotNaN("col_numeric"), selected_fields=("idx",) + ).to_arrow_batch_reader() + assert isinstance(arrow_batch_reader, pa.RecordBatchReader) + arrow_table = arrow_batch_reader.read_all() + assert len(arrow_table) == 2 + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_duckdb_nan(catalog: Catalog) -> None: @@ -354,6 +396,90 @@ def test_pyarrow_deletes_double(catalog: Catalog) -> None: assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10] +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_pyarrow_batches_deletes(catalog: Catalog) -> None: + # number, letter + # (1, 'a'), + # (2, 'b'), + # (3, 'c'), + # (4, 'd'), + # (5, 'e'), + # (6, 'f'), + # (7, 'g'), + # (8, 'h'), + # (9, 'i'), <- deleted + # (10, 'j'), + # (11, 'k'), + # (12, 'l') + test_positional_mor_deletes = catalog.load_table("default.test_positional_mor_deletes") + arrow_table = test_positional_mor_deletes.scan().to_arrow_batch_reader().read_all() + assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12] + + # Checking the filter + arrow_table = ( + test_positional_mor_deletes.scan(row_filter=And(GreaterThanOrEqual("letter", "e"), LessThan("letter", "k"))) + .to_arrow_batch_reader() + .read_all() + ) + assert arrow_table["number"].to_pylist() == [5, 6, 7, 8, 10] + + # Testing the combination of a filter and a limit + arrow_table = ( + test_positional_mor_deletes.scan(row_filter=And(GreaterThanOrEqual("letter", "e"), LessThan("letter", "k")), limit=1) + .to_arrow_batch_reader() + .read_all() + ) + assert arrow_table["number"].to_pylist() == [5] + + # Testing the slicing of indices + arrow_table = test_positional_mor_deletes.scan(limit=3).to_arrow_batch_reader().read_all() + assert arrow_table["number"].to_pylist() == [1, 2, 3] + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_pyarrow_batches_deletes_double(catalog: Catalog) -> None: + # number, letter + # (1, 'a'), + # (2, 'b'), + # (3, 'c'), + # (4, 'd'), + # (5, 'e'), + # (6, 'f'), <- second delete + # (7, 'g'), + # (8, 'h'), + # (9, 'i'), <- first delete + # (10, 'j'), + # (11, 'k'), + # (12, 'l') + test_positional_mor_double_deletes = catalog.load_table("default.test_positional_mor_double_deletes") + arrow_table = test_positional_mor_double_deletes.scan().to_arrow_batch_reader().read_all() + assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10, 11, 12] + + # Checking the filter + arrow_table = ( + test_positional_mor_double_deletes.scan(row_filter=And(GreaterThanOrEqual("letter", "e"), LessThan("letter", "k"))) + .to_arrow_batch_reader() + .read_all() + ) + assert arrow_table["number"].to_pylist() == [5, 7, 8, 10] + + # Testing the combination of a filter and a limit + arrow_table = ( + test_positional_mor_double_deletes.scan( + row_filter=And(GreaterThanOrEqual("letter", "e"), LessThan("letter", "k")), limit=1 + ) + .to_arrow_batch_reader() + .read_all() + ) + assert arrow_table["number"].to_pylist() == [5] + + # Testing the slicing of indices + arrow_table = test_positional_mor_double_deletes.scan(limit=8).to_arrow_batch_reader().read_all() + assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10] + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_partitioned_tables(catalog: Catalog) -> None: From e581b402159564a3b7af1c4328f1c469cefcc8da Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 24 Jun 2024 07:20:31 +0200 Subject: [PATCH 62/68] Github: Add 0.6.1 to issue template (#841) * Github: Add 0.6.1 to issue template * Add to docs as a release step --- .github/ISSUE_TEMPLATE/iceberg_bug_report.yml | 3 ++- mkdocs/docs/how-to-release.md | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml b/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml index f907f681b1..c1e94674f1 100644 --- a/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml @@ -9,7 +9,8 @@ body: description: What Apache Iceberg version are you using? multiple: false options: - - "0.6.0 (latest release)" + - "0.6.1 (latest release)" + - "0.6.0" - "0.5.0" - "0.4.0" - "0.3.0" diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md index 2ecb635610..429af5a3b8 100644 --- a/mkdocs/docs/how-to-release.md +++ b/mkdocs/docs/how-to-release.md @@ -214,3 +214,7 @@ Thanks to everyone for contributing! ## Release the docs A committer triggers the [`Python Docs` Github Actions](https://github.com/apache/iceberg-python/actions/workflows/python-ci-docs.yml) through the UI by selecting the branch that just has been released. This will publish the new docs. + +## Update the Github template + +Make sure to create a PR to update the [GitHub issues template](https://github.com/apache/iceberg-python/blob/main/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml) with the latest version. From 8cdf4abdc5e4779ff888c62041f027bb3309d4c5 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 24 Jun 2024 08:47:35 +0200 Subject: [PATCH 63/68] =?UTF-8?q?=F0=9F=90=9B=20Write=20fields=20instead?= =?UTF-8?q?=20of=20spec=20object=20(#846)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyiceberg/manifest.py | 46 ++++++++++++++++-------------------- tests/utils/test_manifest.py | 4 ++-- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index defe5958c5..bf5749ce9b 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -31,13 +31,15 @@ Type, ) +from pydantic_core import to_json + from pyiceberg.avro.file import AvroFile, AvroOutputFile from pyiceberg.conversions import to_bytes from pyiceberg.exceptions import ValidationError from pyiceberg.io import FileIO, InputFile, OutputFile from pyiceberg.partitioning import PartitionSpec from pyiceberg.schema import Schema -from pyiceberg.typedef import EMPTY_DICT, Record, TableVersion +from pyiceberg.typedef import Record, TableVersion from pyiceberg.types import ( BinaryType, BooleanType, @@ -645,7 +647,6 @@ class ManifestWriter(ABC): _output_file: OutputFile _writer: AvroOutputFile[ManifestEntry] _snapshot_id: int - _meta: Dict[str, str] _added_files: int _added_rows: int _existing_files: int @@ -655,15 +656,12 @@ class ManifestWriter(ABC): _min_data_sequence_number: Optional[int] _partitions: List[Record] - def __init__( - self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int, meta: Dict[str, str] = EMPTY_DICT - ) -> None: + def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int) -> None: self.closed = False self._spec = spec self._schema = schema self._output_file = output_file self._snapshot_id = snapshot_id - self._meta = meta self._added_files = 0 self._added_rows = 0 @@ -697,6 +695,15 @@ def content(self) -> ManifestContent: ... @abstractmethod def version(self) -> TableVersion: ... + @property + def _meta(self) -> Dict[str, str]: + return { + "schema": self._schema.model_dump_json(), + "partition-spec": to_json(self._spec.fields).decode("utf-8"), + "partition-spec-id": str(self._spec.spec_id), + "format-version": str(self.version), + } + def _with_partition(self, format_version: TableVersion) -> Schema: data_file_type = data_file_with_partition( format_version=format_version, partition_type=self._spec.partition_type(self._schema) @@ -771,12 +778,6 @@ def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, schema, output_file, snapshot_id, - { - "schema": schema.model_dump_json(), - "partition-spec": spec.model_dump_json(), - "partition-spec-id": str(spec.spec_id), - "format-version": "1", - }, ) def content(self) -> ManifestContent: @@ -792,19 +793,7 @@ def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry: class ManifestWriterV2(ManifestWriter): def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int): - super().__init__( - spec, - schema, - output_file, - snapshot_id, - meta={ - "schema": schema.model_dump_json(), - "partition-spec": spec.model_dump_json(), - "partition-spec-id": str(spec.spec_id), - "format-version": "2", - "content": "data", - }, - ) + super().__init__(spec, schema, output_file, snapshot_id) def content(self) -> ManifestContent: return ManifestContent.DATA @@ -813,6 +802,13 @@ def content(self) -> ManifestContent: def version(self) -> TableVersion: return 2 + @property + def _meta(self) -> Dict[str, str]: + return { + **super()._meta, + "content": "data", + } + def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry: if entry.data_sequence_number is None: if entry.snapshot_id is not None and entry.snapshot_id != self._snapshot_id: diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index 8bb03cd80e..a812b384fc 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -348,8 +348,8 @@ def test_write_manifest( expected_metadata = { "schema": test_schema.model_dump_json(), - "partition-spec": test_spec.model_dump_json(), - "partition-spec-id": str(test_spec.spec_id), + "partition-spec": """[{"source-id":1,"field-id":1,"transform":"identity","name":"VendorID"},{"source-id":2,"field-id":2,"transform":"identity","name":"tpep_pickup_datetime"}]""", + "partition-spec-id": str(demo_manifest_file.partition_spec_id), "format-version": str(format_version), } _verify_metadata_with_fastavro( From a6cd0cf325b87b360077bad1d79262611ea64424 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jun 2024 11:12:59 +0200 Subject: [PATCH 64/68] Bump tenacity from 8.4.1 to 8.4.2 (#852) Bumps [tenacity](https://github.com/jd/tenacity) from 8.4.1 to 8.4.2. - [Release notes](https://github.com/jd/tenacity/releases) - [Commits](https://github.com/jd/tenacity/compare/8.4.1...8.4.2) --- updated-dependencies: - dependency-name: tenacity dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 085d6836fc..6b8d1ab03f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4019,13 +4019,13 @@ mpmath = ">=0.19" [[package]] name = "tenacity" -version = "8.4.1" +version = "8.4.2" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" files = [ - {file = "tenacity-8.4.1-py3-none-any.whl", hash = "sha256:28522e692eda3e1b8f5e99c51464efcc0b9fc86933da92415168bc1c4e2308fa"}, - {file = "tenacity-8.4.1.tar.gz", hash = "sha256:54b1412b878ddf7e1f1577cd49527bad8cdef32421bd599beac0c6c3f10582fd"}, + {file = "tenacity-8.4.2-py3-none-any.whl", hash = "sha256:9e6f7cf7da729125c7437222f8a522279751cdfbe6b67bfe64f75d3a348661b2"}, + {file = "tenacity-8.4.2.tar.gz", hash = "sha256:cd80a53a79336edba8489e767f729e4f391c896956b57140b5d7511a64bbd3ef"}, ] [package.extras] From 9cb3cd5f1192b3cc008138aea5eff02550c65146 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Wed, 26 Jun 2024 12:06:23 -0700 Subject: [PATCH 65/68] Metadata Log Entries metadata table (#667) --- mkdocs/docs/api.md | 23 ++++++++++++++ pyiceberg/table/__init__.py | 34 +++++++++++++++++++++ pyiceberg/table/metadata.py | 7 +++++ pyiceberg/table/snapshots.py | 4 ++- tests/integration/test_inspect_table.py | 40 +++++++++++++++++++++++++ tests/table/test_snapshots.py | 1 + 6 files changed, 108 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 54f4a20c57..14c7259504 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -656,6 +656,29 @@ partition_summaries: [[ -- is_valid: all not null ["test"]]] ``` +### Metadata Log Entries + +To show table metadata log entries: + +```python +table.inspect.metadata_log_entries() +``` + +``` +pyarrow.Table +timestamp: timestamp[ms] not null +file: string not null +latest_snapshot_id: int64 +latest_schema_id: int32 +latest_sequence_number: int64 +---- +timestamp: [[2024-04-28 17:03:00.214,2024-04-28 17:03:00.352,2024-04-28 17:03:00.445,2024-04-28 17:03:00.498]] +file: [["s3://warehouse/default/table_metadata_log_entries/metadata/00000-0b3b643b-0f3a-4787-83ad-601ba57b7319.metadata.json","s3://warehouse/default/table_metadata_log_entries/metadata/00001-f74e4b2c-0f89-4f55-822d-23d099fd7d54.metadata.json","s3://warehouse/default/table_metadata_log_entries/metadata/00002-97e31507-e4d9-4438-aff1-3c0c5304d271.metadata.json","s3://warehouse/default/table_metadata_log_entries/metadata/00003-6c8b7033-6ad8-4fe4-b64d-d70381aeaddc.metadata.json"]] +latest_snapshot_id: [[null,3958871664825505738,1289234307021405706,7640277914614648349]] +latest_schema_id: [[null,0,0,0]] +latest_sequence_number: [[null,0,0,0]] +``` + ## Add Files Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them. diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index c78e005cac..dced94de9e 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -3845,6 +3845,40 @@ def _partition_summaries_to_rows( schema=manifest_schema, ) + def metadata_log_entries(self) -> "pa.Table": + import pyarrow as pa + + from pyiceberg.table.snapshots import MetadataLogEntry + + table_schema = pa.schema([ + pa.field("timestamp", pa.timestamp(unit="ms"), nullable=False), + pa.field("file", pa.string(), nullable=False), + pa.field("latest_snapshot_id", pa.int64(), nullable=True), + pa.field("latest_schema_id", pa.int32(), nullable=True), + pa.field("latest_sequence_number", pa.int64(), nullable=True), + ]) + + def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any]: + latest_snapshot = self.tbl.snapshot_as_of_timestamp(metadata_entry.timestamp_ms) + return { + "timestamp": metadata_entry.timestamp_ms, + "file": metadata_entry.metadata_file, + "latest_snapshot_id": latest_snapshot.snapshot_id if latest_snapshot else None, + "latest_schema_id": latest_snapshot.schema_id if latest_snapshot else None, + "latest_sequence_number": latest_snapshot.sequence_number if latest_snapshot else None, + } + + # similar to MetadataLogEntriesTable in Java + # https://github.com/apache/iceberg/blob/8a70fe0ff5f241aec8856f8091c77fdce35ad256/core/src/main/java/org/apache/iceberg/MetadataLogEntriesTable.java#L62-L66 + metadata_log_entries = self.tbl.metadata.metadata_log + [ + MetadataLogEntry(metadata_file=self.tbl.metadata_location, timestamp_ms=self.tbl.metadata.last_updated_ms) + ] + + return pa.Table.from_pylist( + [metadata_log_entry_to_row(entry) for entry in metadata_log_entries], + schema=table_schema, + ) + @dataclass(frozen=True) class TablePartition: diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 8c3c389318..1fea33010c 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -311,6 +311,13 @@ def serialize_current_snapshot_id(self, current_snapshot_id: Optional[int]) -> O return -1 return current_snapshot_id + @field_serializer("snapshots") + def serialize_snapshots(self, snapshots: List[Snapshot]) -> List[Snapshot]: + # Snapshot field `sequence-number` should not be written for v1 metadata + if self.format_version == 1: + return [snapshot.model_copy(update={"sequence_number": None}) for snapshot in snapshots] + return snapshots + def _generate_snapshot_id() -> int: """Generate a new Snapshot ID from a UUID. diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index d6a3ff1654..842d42522a 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -58,6 +58,8 @@ CHANGED_PARTITION_PREFIX = "partitions." OPERATION = "operation" +INITIAL_SEQUENCE_NUMBER = 0 + class Operation(Enum): """Describes the operation. @@ -231,7 +233,7 @@ def __eq__(self, other: Any) -> bool: class Snapshot(IcebergBaseModel): snapshot_id: int = Field(alias="snapshot-id") parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None) - sequence_number: Optional[int] = Field(alias="sequence-number", default=None) + sequence_number: Optional[int] = Field(alias="sequence-number", default=INITIAL_SEQUENCE_NUMBER) timestamp_ms: int = Field(alias="timestamp-ms", default_factory=lambda: int(time.time() * 1000)) manifest_list: Optional[str] = Field( alias="manifest-list", description="Location of the snapshot's manifest list file", default=None diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 1f2b9a3ead..2840fb0b16 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -528,3 +528,43 @@ def test_inspect_manifests(spark: SparkSession, session_catalog: Catalog, format for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): assert left == right, f"Difference in column {column}: {left} != {right}" + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_inspect_metadata_log_entries( + spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int +) -> None: + from pandas.testing import assert_frame_equal + + identifier = "default.table_metadata_log_entries" + try: + session_catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + tbl = _create_table(session_catalog, identifier, properties={"format-version": format_version}) + + # Write some data + tbl.append(arrow_table_with_null) + tbl.append(arrow_table_with_null) + tbl.append(arrow_table_with_null) + + df = tbl.inspect.metadata_log_entries() + spark_df = spark.sql(f"SELECT * FROM {identifier}.metadata_log_entries") + lhs = df.to_pandas() + rhs = spark_df.toPandas() + + # Timestamp in the last row of `metadata_log_entries` table is based on when the table was read + # Therefore, the timestamp of the last row for pyiceberg dataframe and spark dataframe will be different + left_before_last, left_last = lhs[:-1], lhs[-1:] + right_before_last, right_last = rhs[:-1], rhs[-1:] + + # compare all rows except for the last row + assert_frame_equal(left_before_last, right_before_last, check_dtype=False) + # compare the last row, except for the timestamp + for column in df.column_names: + for left, right in zip(left_last[column], right_last[column]): + if column == "timestamp": + continue + assert left == right, f"Difference in column {column}: {left} != {right}" diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index 2569a11dc2..fa3464052a 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -77,6 +77,7 @@ def test_serialize_snapshot_without_sequence_number() -> None: snapshot = Snapshot( snapshot_id=25, parent_snapshot_id=19, + sequence_number=None, timestamp_ms=1602638573590, manifest_list="s3:/a/b/c.avro", summary=Summary(Operation.APPEND), From 132208a95c05183ed2bbd11386178a0bf5bb12a5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 26 Jun 2024 22:28:25 +0200 Subject: [PATCH 66/68] Bump coverage from 7.5.3 to 7.5.4 (#854) Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.5.3 to 7.5.4. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/7.5.3...7.5.4) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 106 ++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6b8d1ab03f..db148196d8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -652,63 +652,63 @@ files = [ [[package]] name = "coverage" -version = "7.5.3" +version = "7.5.4" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, - {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, - {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, - {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, - {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, - {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, - {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, - {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, - {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, - {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, - {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, - {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, - {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, - {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, - {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, - {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, - {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, - {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, - {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, - {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, - {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, - {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"}, + {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"}, + {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"}, + {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"}, + {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"}, + {file = "coverage-7.5.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233"}, + {file = "coverage-7.5.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805"}, + {file = "coverage-7.5.4-cp312-cp312-win32.whl", hash = "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b"}, + {file = "coverage-7.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7"}, + {file = "coverage-7.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882"}, + {file = "coverage-7.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f"}, + {file = "coverage-7.5.4-cp38-cp38-win32.whl", hash = "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f"}, + {file = "coverage-7.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"}, + {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"}, + {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"}, + {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"}, + {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"}, ] [package.dependencies] From 4049971aab4b69d4e9c7204fa871b7a8df84f804 Mon Sep 17 00:00:00 2001 From: edson duarte Date: Wed, 26 Jun 2024 18:35:20 -0300 Subject: [PATCH 67/68] Add mkdocs toc config section (#858) --- mkdocs/mkdocs.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index 90892ac73b..679aff2578 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -53,8 +53,11 @@ theme: toggle: icon: material/brightness-4 name: Switch to light mode + markdown_extensions: - admonition - pymdownx.highlight: anchor_linenums: true - pymdownx.superfences + - toc: + permalink: true From 0e381fa28bb7c18dc6ba5dc46bedf55d77057e7f Mon Sep 17 00:00:00 2001 From: Andre Luis Anastacio Date: Wed, 26 Jun 2024 18:37:39 -0300 Subject: [PATCH 68/68] Add history inspect table (#828) --- mkdocs/docs/api.md | 21 ++++++++ pyiceberg/table/__init__.py | 28 ++++++++++ tests/integration/test_inspect_table.py | 69 +++++++++++++++++++++++++ 3 files changed, 118 insertions(+) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 14c7259504..6da2fc3a8b 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -679,6 +679,27 @@ latest_schema_id: [[null,0,0,0]] latest_sequence_number: [[null,0,0,0]] ``` +### History + +To show a table's history: + +```python +table.inspect.history() +``` + +``` +pyarrow.Table +made_current_at: timestamp[ms] not null +snapshot_id: int64 not null +parent_id: int64 +is_current_ancestor: bool not null +---- +made_current_at: [[2024-06-18 16:17:48.768,2024-06-18 16:17:49.240,2024-06-18 16:17:49.343,2024-06-18 16:17:49.511]] +snapshot_id: [[4358109269873137077,3380769165026943338,4358109269873137077,3089420140651211776]] +parent_id: [[null,4358109269873137077,null,4358109269873137077]] +is_current_ancestor: [[true,false,true,true]] +``` + ## Add Files Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them. diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index dced94de9e..8c1493974b 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -113,6 +113,7 @@ SnapshotLogEntry, SnapshotSummaryCollector, Summary, + ancestors_of, update_snapshot_summaries, ) from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder @@ -3879,6 +3880,33 @@ def metadata_log_entry_to_row(metadata_entry: MetadataLogEntry) -> Dict[str, Any schema=table_schema, ) + def history(self) -> "pa.Table": + import pyarrow as pa + + history_schema = pa.schema([ + pa.field("made_current_at", pa.timestamp(unit="ms"), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("parent_id", pa.int64(), nullable=True), + pa.field("is_current_ancestor", pa.bool_(), nullable=False), + ]) + + ancestors_ids = {snapshot.snapshot_id for snapshot in ancestors_of(self.tbl.current_snapshot(), self.tbl.metadata)} + + history = [] + metadata = self.tbl.metadata + + for snapshot_entry in metadata.snapshot_log: + snapshot = metadata.snapshot_by_id(snapshot_entry.snapshot_id) + + history.append({ + "made_current_at": datetime.utcfromtimestamp(snapshot_entry.timestamp_ms / 1000.0), + "snapshot_id": snapshot_entry.snapshot_id, + "parent_id": snapshot.parent_snapshot_id if snapshot else None, + "is_current_ancestor": snapshot_entry.snapshot_id in ancestors_ids, + }) + + return pa.Table.from_pylist(history, schema=history_schema) + @dataclass(frozen=True) class TablePartition: diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 2840fb0b16..8414fba333 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -568,3 +568,72 @@ def test_inspect_metadata_log_entries( if column == "timestamp": continue assert left == right, f"Difference in column {column}: {left} != {right}" + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_inspect_history(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = "default.table_history" + + try: + session_catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + spark.sql( + f""" + CREATE TABLE {identifier} ( + id int, + data string + ) + PARTITIONED BY (data) + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (1, "a") + """ + ) + + table = session_catalog.load_table(identifier) + first_snapshot = table.current_snapshot() + snapshot_id = None if not first_snapshot else first_snapshot.snapshot_id + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (2, "b") + """ + ) + + spark.sql( + f""" + CALL integration.system.rollback_to_snapshot('{identifier}', {snapshot_id}) + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (3, "c") + """ + ) + + table.refresh() + + df = table.inspect.history() + + assert df.column_names == [ + "made_current_at", + "snapshot_id", + "parent_id", + "is_current_ancestor", + ] + + lhs = spark.table(f"{identifier}.history").toPandas() + rhs = df.to_pandas() + for column in df.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + if isinstance(left, float) and math.isnan(left) and isinstance(right, float) and math.isnan(right): + # NaN != NaN in Python + continue + assert left == right, f"Difference in column {column}: {left} != {right}"