diff --git a/python/pyarrow/parquet/__init__.py b/python/pyarrow/parquet/__init__.py index 867babdaf81..f616b04e1d2 100644 --- a/python/pyarrow/parquet/__init__.py +++ b/python/pyarrow/parquet/__init__.py @@ -1059,7 +1059,7 @@ def __init__(self, path, open_file_func=partial(open, mode='rb'), warnings.warn( "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will " "be removed in a future version.", - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) self._init( path, open_file_func, file_options, row_group, partition_keys) @@ -1692,7 +1692,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, "Specifying the 'metadata_nthreads' argument is deprecated as " "of pyarrow 8.0.0, and the argument will be removed in a " "future version", - DeprecationWarning, stacklevel=2, + FutureWarning, stacklevel=2, ) else: metadata_nthreads = 1 @@ -1742,7 +1742,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, "specify it in combination with 'use_legacy_dataet=False', " "but in that case you need to specify a pyarrow.Schema " "instead of a ParquetSchema.", - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) self._schema = schema self.split_row_groups = split_row_groups @@ -1953,7 +1953,7 @@ def pieces(self): " Specify 'use_legacy_dataset=False' while constructing the " "ParquetDataset, and then use the '.fragments' attribute " "instead."), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._pieces @property @@ -1967,7 +1967,7 @@ def partitions(self): " Specify 'use_legacy_dataset=False' while constructing the " "ParquetDataset, and then use the '.partitioning' attribute " "instead."), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._partitions @property @@ -1979,7 +1979,7 @@ def schema(self): "ParquetDataset, and then use the '.schema' attribute " "instead (which will return an Arrow schema instead of a " "Parquet schema)."), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._schema @property @@ -1989,7 +1989,7 @@ def memory_map(self): """ warnings.warn( _DEPR_MSG.format("ParquetDataset.memory_map", ""), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._metadata.memory_map @property @@ -1999,7 +1999,7 @@ def read_dictionary(self): """ warnings.warn( _DEPR_MSG.format("ParquetDataset.read_dictionary", ""), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._metadata.read_dictionary @property @@ -2009,7 +2009,7 @@ def buffer_size(self): """ warnings.warn( _DEPR_MSG.format("ParquetDataset.buffer_size", ""), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._metadata.buffer_size _fs = property( @@ -2027,7 +2027,7 @@ def fs(self): " Specify 'use_legacy_dataset=False' while constructing the " "ParquetDataset, and then use the '.filesystem' attribute " "instead."), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._metadata.fs _common_metadata = property( @@ -2041,7 +2041,7 @@ def common_metadata(self): """ warnings.warn( _DEPR_MSG.format("ParquetDataset.common_metadata", ""), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return self._metadata.common_metadata @property @@ -2453,7 +2453,7 @@ def pieces(self): warnings.warn( _DEPR_MSG.format("ParquetDataset.pieces", " Use the '.fragments' attribute instead"), - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) return list(self._dataset.get_fragments()) @property @@ -2744,7 +2744,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None, "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " "deprecated as of pyarrow 8.0.0, and the legacy implementation will " "be removed in a future version.", - DeprecationWarning, stacklevel=2) + FutureWarning, stacklevel=2) if ignore_prefixes is not None: raise ValueError( diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py index 551db4a8fe6..b11e412e63d 100644 --- a/python/pyarrow/tests/parquet/common.py +++ b/python/pyarrow/tests/parquet/common.py @@ -23,14 +23,25 @@ import pyarrow as pa from pyarrow.tests import util +legacy_filter_mark = pytest.mark.filterwarnings( + "ignore:Passing 'use_legacy:FutureWarning" +) + parametrize_legacy_dataset = pytest.mark.parametrize( "use_legacy_dataset", - [True, pytest.param(False, marks=pytest.mark.dataset)]) + [pytest.param(True, marks=legacy_filter_mark), + pytest.param(False, marks=pytest.mark.dataset)] +) parametrize_legacy_dataset_not_supported = pytest.mark.parametrize( - "use_legacy_dataset", [True, pytest.param(False, marks=pytest.mark.skip)]) + "use_legacy_dataset", + [pytest.param(True, marks=legacy_filter_mark), + pytest.param(False, marks=pytest.mark.skip)] +) parametrize_legacy_dataset_fixed = pytest.mark.parametrize( - "use_legacy_dataset", [pytest.param(True, marks=pytest.mark.xfail), - pytest.param(False, marks=pytest.mark.dataset)]) + "use_legacy_dataset", + [pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]), + pytest.param(False, marks=pytest.mark.dataset)] +) # Marks all of the tests in this module # Ignore these with pytest ... -m 'not parquet' @@ -58,7 +69,7 @@ def _read_table(*args, **kwargs): def _roundtrip_table(table, read_table_kwargs=None, - write_table_kwargs=None, use_legacy_dataset=True): + write_table_kwargs=None, use_legacy_dataset=False): read_table_kwargs = read_table_kwargs or {} write_table_kwargs = write_table_kwargs or {} @@ -70,7 +81,7 @@ def _roundtrip_table(table, read_table_kwargs=None, def _check_roundtrip(table, expected=None, read_table_kwargs=None, - use_legacy_dataset=True, **write_table_kwargs): + use_legacy_dataset=False, **write_table_kwargs): if expected is None: expected = table @@ -87,7 +98,7 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None, assert result.equals(expected) -def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=True): +def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False): table = pa.Table.from_pandas(df) result = _roundtrip_table( table, write_table_kwargs=write_kwargs, diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index e82e3a36df2..edcfb0dc4cc 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -795,6 +795,6 @@ def test_read_table_legacy_deprecated(tempdir): pq.write_table(table, path) with pytest.warns( - DeprecationWarning, match="Passing 'use_legacy_dataset=True'" + FutureWarning, match="Passing 'use_legacy_dataset=True'" ): pq.read_table(path, use_legacy_dataset=True) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 6326743113f..6477132dcd3 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -55,7 +55,7 @@ def test_parquet_piece_read(tempdir): path = tempdir / 'parquet_piece_read.parquet' _write_table(table, path, version='2.6') - with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning): piece1 = pq.ParquetDatasetPiece(path) result = piece1.read() @@ -70,7 +70,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir): path = tempdir / 'parquet_piece_read.parquet' _write_table(table, path, version='2.6') - with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning): piece = pq.ParquetDatasetPiece(path) table1 = piece.read() assert isinstance(table1, pa.Table) @@ -80,7 +80,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir): assert table.equals(table1) -@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning") def test_parquet_piece_basics(): path = '/baz.parq' @@ -140,7 +140,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset): _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset) -@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning") @pytest.mark.pandas def test_create_parquet_dataset_multi_threaded(tempdir): fs = LocalFileSystem._get_instance() @@ -151,7 +151,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir): manifest = pq.ParquetManifest(base_path, filesystem=fs, metadata_nthreads=1) with pytest.warns( - DeprecationWarning, match="Specifying the 'metadata_nthreads'" + FutureWarning, match="Specifying the 'metadata_nthreads'" ): dataset = pq.ParquetDataset( base_path, filesystem=fs, metadata_nthreads=16) @@ -801,14 +801,14 @@ def _test_read_common_metadata_files(fs, base_path): @pytest.mark.pandas -@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning") def test_read_common_metadata_files(tempdir): fs = LocalFileSystem._get_instance() _test_read_common_metadata_files(fs, tempdir) @pytest.mark.pandas -@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning") def test_read_metadata_files(tempdir): fs = LocalFileSystem._get_instance() @@ -922,7 +922,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): result2 = read_multiple_files(paths, metadata=metadata) assert result2.equals(expected) - with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"): + with pytest.warns(FutureWarning, match="Specifying the 'schema'"): result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read() assert result3.equals(expected) else: @@ -968,7 +968,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): mixed_paths = [bad_apple_path, paths[0]] with pytest.raises(ValueError): - with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"): + with pytest.warns(FutureWarning, match="Specifying the 'schema'"): read_multiple_files(mixed_paths, schema=bad_meta.schema) with pytest.raises(ValueError): @@ -1014,7 +1014,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset): tm.assert_frame_equal(result.reindex(columns=expected.columns), expected) -@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning") @pytest.mark.pandas @parametrize_legacy_dataset def test_dataset_memory_map(tempdir, use_legacy_dataset): @@ -1217,7 +1217,7 @@ def test_empty_directory(tempdir, use_legacy_dataset): assert result.num_columns == 0 -@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning") def _test_write_to_dataset_with_partitions(base_path, use_legacy_dataset=True, filesystem=None, @@ -1259,7 +1259,7 @@ def _test_write_to_dataset_with_partitions(base_path, use_legacy_dataset=use_legacy_dataset) # ARROW-2209: Ensure the dataset schema also includes the partition columns if use_legacy_dataset: - with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"): + with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"): dataset_cols = set(dataset.schema.to_arrow_schema().names) else: # NB schema property is an arrow and not parquet schema @@ -1409,7 +1409,7 @@ def test_write_to_dataset_no_partitions_s3fs( path, use_legacy_dataset, filesystem=fs) -@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning") @pytest.mark.pandas @parametrize_legacy_dataset_not_supported def test_write_to_dataset_with_partitions_and_custom_filenames( @@ -1569,6 +1569,7 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset): @pytest.mark.dataset @pytest.mark.pandas +@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning") def test_read_table_schema(tempdir): # test that schema keyword is passed through in read_table table = pa.table({'a': pa.array([1, 2, 3], pa.int32())}) @@ -1622,6 +1623,7 @@ def test_dataset_unsupported_keywords(): @pytest.mark.dataset +@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning") def test_dataset_partitioning(tempdir): import pyarrow.dataset as ds @@ -1669,7 +1671,7 @@ def test_parquet_dataset_new_filesystem(tempdir): assert result.equals(table) -@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning") def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir): # ARROW-10462 ensure that on Windows we properly use posix-style paths # as used by fsspec @@ -1693,30 +1695,33 @@ def test_parquet_dataset_deprecated_properties(tempdir): pq.write_table(table, path) dataset = pq.ParquetDataset(path) - with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"): + with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"): dataset.pieces - with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"): + with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"): dataset.partitions - with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"): + with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"): dataset.memory_map - with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"): + with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"): dataset.read_dictionary - with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"): + with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"): dataset.buffer_size - with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"): + with pytest.warns(FutureWarning, match="'ParquetDataset.fs"): dataset.fs - with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"): + with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"): dataset.schema + with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"): + dataset.common_metadata + dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False) - with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"): + with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"): dataset2.pieces