apache · jorisvandenbossche · Apr 5, 2022 · Apr 21, 2022
diff --git a/python/pyarrow/parquet/__init__.py b/python/pyarrow/parquet/__init__.py
@@ -1059,7 +1059,7 @@ def __init__(self, path, open_file_func=partial(open, mode='rb'),
         warnings.warn(
             "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
             "be removed in a future version.",
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         self._init(
             path, open_file_func, file_options, row_group, partition_keys)
 
@@ -1692,7 +1692,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
                 "Specifying the 'metadata_nthreads' argument is deprecated as "
                 "of pyarrow 8.0.0, and the argument will be removed in a "
                 "future version",
-                DeprecationWarning, stacklevel=2,
+                FutureWarning, stacklevel=2,
             )
         else:
             metadata_nthreads = 1
@@ -1742,7 +1742,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
                 "specify it in combination with 'use_legacy_dataet=False', "
                 "but in that case you need to specify a pyarrow.Schema "
                 "instead of a ParquetSchema.",
-                DeprecationWarning, stacklevel=2)
+                FutureWarning, stacklevel=2)
         self._schema = schema
 
         self.split_row_groups = split_row_groups
@@ -1953,7 +1953,7 @@ def pieces(self):
                 " Specify 'use_legacy_dataset=False' while constructing the "
                 "ParquetDataset, and then use the '.fragments' attribute "
                 "instead."),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._pieces
 
     @property
@@ -1967,7 +1967,7 @@ def partitions(self):
                 " Specify 'use_legacy_dataset=False' while constructing the "
                 "ParquetDataset, and then use the '.partitioning' attribute "
                 "instead."),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._partitions
 
     @property
@@ -1979,7 +1979,7 @@ def schema(self):
                 "ParquetDataset, and then use the '.schema' attribute "
                 "instead (which will return an Arrow schema instead of a "
                 "Parquet schema)."),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._schema
 
     @property
@@ -1989,7 +1989,7 @@ def memory_map(self):
         """
         warnings.warn(
             _DEPR_MSG.format("ParquetDataset.memory_map", ""),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._metadata.memory_map
 
     @property
@@ -1999,7 +1999,7 @@ def read_dictionary(self):
         """
         warnings.warn(
             _DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._metadata.read_dictionary
 
     @property
@@ -2009,7 +2009,7 @@ def buffer_size(self):
         """
         warnings.warn(
             _DEPR_MSG.format("ParquetDataset.buffer_size", ""),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._metadata.buffer_size
 
     _fs = property(
@@ -2027,7 +2027,7 @@ def fs(self):
                 " Specify 'use_legacy_dataset=False' while constructing the "
                 "ParquetDataset, and then use the '.filesystem' attribute "
                 "instead."),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._metadata.fs
 
     _common_metadata = property(
@@ -2041,7 +2041,7 @@ def common_metadata(self):
         """
         warnings.warn(
             _DEPR_MSG.format("ParquetDataset.common_metadata", ""),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return self._metadata.common_metadata
 
     @property
@@ -2453,7 +2453,7 @@ def pieces(self):
         warnings.warn(
             _DEPR_MSG.format("ParquetDataset.pieces",
                              " Use the '.fragments' attribute instead"),
-            DeprecationWarning, stacklevel=2)
+            FutureWarning, stacklevel=2)
         return list(self._dataset.get_fragments())
 
     @property
@@ -2744,7 +2744,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
         "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
         "deprecated as of pyarrow 8.0.0, and the legacy implementation will "
         "be removed in a future version.",
-        DeprecationWarning, stacklevel=2)
+        FutureWarning, stacklevel=2)
 
     if ignore_prefixes is not None:
         raise ValueError(

diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
@@ -23,14 +23,25 @@
 import pyarrow as pa
 from pyarrow.tests import util
 
+legacy_filter_mark = pytest.mark.filterwarnings(
+    "ignore:Passing 'use_legacy:FutureWarning"
+)
+
 parametrize_legacy_dataset = pytest.mark.parametrize(
     "use_legacy_dataset",
-    [True, pytest.param(False, marks=pytest.mark.dataset)])
+    [pytest.param(True, marks=legacy_filter_mark),
+     pytest.param(False, marks=pytest.mark.dataset)]
+)
 parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
-    "use_legacy_dataset", [True, pytest.param(False, marks=pytest.mark.skip)])
+    "use_legacy_dataset",
+    [pytest.param(True, marks=legacy_filter_mark),
+     pytest.param(False, marks=pytest.mark.skip)]
+)
 parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
-    "use_legacy_dataset", [pytest.param(True, marks=pytest.mark.xfail),
-                           pytest.param(False, marks=pytest.mark.dataset)])
+    "use_legacy_dataset",
+    [pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
+     pytest.param(False, marks=pytest.mark.dataset)]
+)
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
@@ -58,7 +69,7 @@ def _read_table(*args, **kwargs):
 
 
 def _roundtrip_table(table, read_table_kwargs=None,
-                     write_table_kwargs=None, use_legacy_dataset=True):
+                     write_table_kwargs=None, use_legacy_dataset=False):
     read_table_kwargs = read_table_kwargs or {}
     write_table_kwargs = write_table_kwargs or {}
 
@@ -70,7 +81,7 @@ def _roundtrip_table(table, read_table_kwargs=None,
 
 
 def _check_roundtrip(table, expected=None, read_table_kwargs=None,
-                     use_legacy_dataset=True, **write_table_kwargs):
+                     use_legacy_dataset=False, **write_table_kwargs):
     if expected is None:
         expected = table
 
@@ -87,7 +98,7 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
     assert result.equals(expected)
 
 
-def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=True):
+def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
     table = pa.Table.from_pandas(df)
     result = _roundtrip_table(
         table, write_table_kwargs=write_kwargs,

diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
@@ -795,6 +795,6 @@ def test_read_table_legacy_deprecated(tempdir):
     pq.write_table(table, path)
 
     with pytest.warns(
-        DeprecationWarning, match="Passing 'use_legacy_dataset=True'"
+        FutureWarning, match="Passing 'use_legacy_dataset=True'"
     ):
         pq.read_table(path, use_legacy_dataset=True)
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
@@ -55,7 +55,7 @@ def test_parquet_piece_read(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.6')
 
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(FutureWarning):
         piece1 = pq.ParquetDatasetPiece(path)
 
     result = piece1.read()
@@ -70,7 +70,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.6')
 
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(FutureWarning):
         piece = pq.ParquetDatasetPiece(path)
     table1 = piece.read()
     assert isinstance(table1, pa.Table)
@@ -80,7 +80,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     assert table.equals(table1)
 
 
-@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
 def test_parquet_piece_basics():
     path = '/baz.parq'
 
@@ -140,7 +140,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
     _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
 def test_create_parquet_dataset_multi_threaded(tempdir):
     fs = LocalFileSystem._get_instance()
@@ -151,7 +151,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir):
     manifest = pq.ParquetManifest(base_path, filesystem=fs,
                                   metadata_nthreads=1)
     with pytest.warns(
-        DeprecationWarning, match="Specifying the 'metadata_nthreads'"
+        FutureWarning, match="Specifying the 'metadata_nthreads'"
     ):
         dataset = pq.ParquetDataset(
             base_path, filesystem=fs, metadata_nthreads=16)
@@ -801,14 +801,14 @@ def _test_read_common_metadata_files(fs, base_path):
 
 
 @pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
 def test_read_common_metadata_files(tempdir):
     fs = LocalFileSystem._get_instance()
     _test_read_common_metadata_files(fs, tempdir)
 
 
 @pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
 def test_read_metadata_files(tempdir):
     fs = LocalFileSystem._get_instance()
 
@@ -922,7 +922,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
         result2 = read_multiple_files(paths, metadata=metadata)
         assert result2.equals(expected)
 
-        with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
+        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
             result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read()
         assert result3.equals(expected)
     else:
@@ -968,7 +968,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
     mixed_paths = [bad_apple_path, paths[0]]
 
     with pytest.raises(ValueError):
-        with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
+        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
             read_multiple_files(mixed_paths, schema=bad_meta.schema)
 
     with pytest.raises(ValueError):
@@ -1014,7 +1014,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset
 def test_dataset_memory_map(tempdir, use_legacy_dataset):
@@ -1217,7 +1217,7 @@ def test_empty_directory(tempdir, use_legacy_dataset):
     assert result.num_columns == 0
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
 def _test_write_to_dataset_with_partitions(base_path,
                                            use_legacy_dataset=True,
                                            filesystem=None,
@@ -1259,7 +1259,7 @@ def _test_write_to_dataset_with_partitions(base_path,
                                 use_legacy_dataset=use_legacy_dataset)
     # ARROW-2209: Ensure the dataset schema also includes the partition columns
     if use_legacy_dataset:
-        with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
+        with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
             dataset_cols = set(dataset.schema.to_arrow_schema().names)
     else:
         # NB schema property is an arrow and not parquet schema
@@ -1409,7 +1409,7 @@ def test_write_to_dataset_no_partitions_s3fs(
         path, use_legacy_dataset, filesystem=fs)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset_not_supported
 def test_write_to_dataset_with_partitions_and_custom_filenames(
@@ -1569,6 +1569,7 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
 
 @pytest.mark.dataset
 @pytest.mark.pandas
+@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_read_table_schema(tempdir):
     # test that schema keyword is passed through in read_table
     table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
@@ -1622,6 +1623,7 @@ def test_dataset_unsupported_keywords():
 
 
 @pytest.mark.dataset
+@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_dataset_partitioning(tempdir):
     import pyarrow.dataset as ds
 
@@ -1669,7 +1671,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
     assert result.equals(table)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ARROW-10462 ensure that on Windows we properly use posix-style paths
     # as used by fsspec
@@ -1693,30 +1695,33 @@ def test_parquet_dataset_deprecated_properties(tempdir):
     pq.write_table(table, path)
     dataset = pq.ParquetDataset(path)
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
         dataset.pieces
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
         dataset.partitions
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
         dataset.memory_map
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
         dataset.read_dictionary
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
         dataset.buffer_size
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
         dataset.fs
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
         dataset.schema
 
+    with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
+        dataset.common_metadata
+
     dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
 
-    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
         dataset2.pieces