diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 2cf2c9ef5c..8bebc53d92 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -36,7 +36,11 @@ from pyiceberg.catalog.hive import HiveCatalog from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import NoSuchTableError +from pyiceberg.partitioning import PartitionField, PartitionSpec +from pyiceberg.schema import Schema from pyiceberg.table import TableProperties, _dataframe_to_data_files +from pyiceberg.transforms import IdentityTransform +from pyiceberg.types import IntegerType, NestedField from tests.conftest import TEST_DATA_WITH_NULL from utils import _create_table @@ -807,3 +811,25 @@ def test_hive_catalog_storage_descriptor( assert len(tbl.scan().to_arrow()) == 3 # check if spark can read the table assert spark.sql("SELECT * FROM hive.default.test_storage_descriptor").count() == 3 + + +@pytest.mark.integration +@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +def test_sanitize_character_partitioned(catalog: Catalog) -> None: + table_name = "default.test_table_partitioned_sanitized_character" + try: + catalog.drop_table(table_name) + except NoSuchTableError: + pass + + tbl = _create_table( + session_catalog=catalog, + identifier=table_name, + schema=Schema(NestedField(field_id=1, name="some.id", type=IntegerType(), required=True)), + partition_spec=PartitionSpec( + PartitionField(source_id=1, field_id=1000, name="some.id_identity", transform=IdentityTransform()) + ), + data=[pa.Table.from_arrays([range(22)], schema=pa.schema([pa.field("some.id", pa.int32(), nullable=False)]))], + ) + + assert len(tbl.scan().to_arrow()) == 22 diff --git a/tests/integration/test_writes/utils.py b/tests/integration/test_writes/utils.py index 742b1e14fc..9f1f6df043 100644 --- a/tests/integration/test_writes/utils.py +++ b/tests/integration/test_writes/utils.py @@ -21,10 +21,10 @@ from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError -from pyiceberg.partitioning import PartitionSpec +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table -from pyiceberg.typedef import Properties +from pyiceberg.typedef import EMPTY_DICT, Properties from pyiceberg.types import ( BinaryType, BooleanType, @@ -62,9 +62,9 @@ def _create_table( session_catalog: Catalog, identifier: str, - properties: Properties, + properties: Properties = EMPTY_DICT, data: Optional[List[pa.Table]] = None, - partition_spec: Optional[PartitionSpec] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, schema: Union[Schema, "pa.Schema"] = TABLE_SCHEMA, ) -> Table: try: @@ -72,14 +72,9 @@ def _create_table( except NoSuchTableError: pass - if partition_spec: - tbl = session_catalog.create_table( - identifier=identifier, schema=schema, properties=properties, partition_spec=partition_spec - ) - else: - tbl = session_catalog.create_table(identifier=identifier, schema=schema, properties=properties) + tbl = session_catalog.create_table(identifier=identifier, schema=schema, properties=properties, partition_spec=partition_spec) - if data: + if data is not None: for d in data: tbl.append(d)