From c6c32360aaab038c3a27912dcc7fb8dfb2c56745 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 10 Jun 2025 15:07:15 -0500 Subject: [PATCH] default is true --- python/pyarrow/parquet/core.py | 12 ++++++------ python/pyarrow/tests/parquet/test_data_types.py | 7 +++++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index a84fd5e8b7a..e98a8133954 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -261,7 +261,7 @@ class ParquetFile: it will be parsed as an URI to determine the filesystem. page_checksum_verification : bool, default False If True, verify the checksum for each page read from the file. - arrow_extensions_enabled : bool, default False + arrow_extensions_enabled : bool, default True If True, read Parquet logical types as Arrow extension types where possible, (e.g., read JSON as the canonical `arrow.json` extension type or UUID as the canonical `arrow.uuid` extension type). @@ -314,7 +314,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None, coerce_int96_timestamp_unit=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, filesystem=None, - page_checksum_verification=False, arrow_extensions_enabled=False): + page_checksum_verification=False, arrow_extensions_enabled=True): self._close_source = getattr(source, 'closed', True) @@ -1321,7 +1321,7 @@ class ParquetDataset: sufficient for most Parquet files. page_checksum_verification : bool, default False If True, verify the page checksum for each page read from the file. -arrow_extensions_enabled : bool, default False +arrow_extensions_enabled : bool, default True If True, read Parquet logical types as Arrow extension types where possible, (e.g., read JSON as the canonical `arrow.json` extension type or UUID as the canonical `arrow.uuid` extension type). @@ -1339,7 +1339,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, page_checksum_verification=False, - arrow_extensions_enabled=False): + arrow_extensions_enabled=True): import pyarrow.dataset as ds # map format arguments @@ -1739,7 +1739,7 @@ def partitioning(self): sufficient for most Parquet files. page_checksum_verification : bool, default False If True, verify the checksum for each page read from the file. -arrow_extensions_enabled : bool, default False +arrow_extensions_enabled : bool, default True If True, read Parquet logical types as Arrow extension types where possible, (e.g., read JSON as the canonical `arrow.json` extension type or UUID as the canonical `arrow.uuid` extension type). @@ -1839,7 +1839,7 @@ def read_table(source, *, columns=None, use_threads=True, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, page_checksum_verification=False, - arrow_extensions_enabled=False): + arrow_extensions_enabled=True): try: dataset = ParquetDataset( diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py index 351221f64df..c546bc1532a 100644 --- a/python/pyarrow/tests/parquet/test_data_types.py +++ b/python/pyarrow/tests/parquet/test_data_types.py @@ -569,6 +569,7 @@ def test_json_extension_type(storage_type): _check_roundtrip( table, pa.table({"ext": pa.array(data, pa.string())}), + {"arrow_extensions_enabled": False}, store_schema=False) # With arrow_extensions_enabled=True on read, we get a arrow.json back @@ -576,7 +577,7 @@ def test_json_extension_type(storage_type): _check_roundtrip( table, pa.table({"ext": pa.array(data, pa.json_(pa.string()))}), - read_table_kwargs={"arrow_extensions_enabled": True}, + {"arrow_extensions_enabled": True}, store_schema=False) @@ -594,11 +595,13 @@ def test_uuid_extension_type(): _check_roundtrip( table, pa.table({"ext": pa.array(data, pa.binary(16))}), + {"arrow_extensions_enabled": False}, store_schema=False) _check_roundtrip( table, table, - {"arrow_extensions_enabled": True}, store_schema=False) + {"arrow_extensions_enabled": True}, + store_schema=False) def test_undefined_logical_type(parquet_test_datadir):