Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions python/pyarrow/parquet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ def __init__(self, path, open_file_func=partial(open, mode='rb'),
warnings.warn(
"ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
"be removed in a future version.",
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
self._init(
path, open_file_func, file_options, row_group, partition_keys)

Expand Down Expand Up @@ -1692,7 +1692,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
"Specifying the 'metadata_nthreads' argument is deprecated as "
"of pyarrow 8.0.0, and the argument will be removed in a "
"future version",
DeprecationWarning, stacklevel=2,
FutureWarning, stacklevel=2,
)
else:
metadata_nthreads = 1
Expand Down Expand Up @@ -1742,7 +1742,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
"specify it in combination with 'use_legacy_dataet=False', "
"but in that case you need to specify a pyarrow.Schema "
"instead of a ParquetSchema.",
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
self._schema = schema

self.split_row_groups = split_row_groups
Expand Down Expand Up @@ -1953,7 +1953,7 @@ def pieces(self):
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.fragments' attribute "
"instead."),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._pieces

@property
Expand All @@ -1967,7 +1967,7 @@ def partitions(self):
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.partitioning' attribute "
"instead."),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._partitions

@property
Expand All @@ -1979,7 +1979,7 @@ def schema(self):
"ParquetDataset, and then use the '.schema' attribute "
"instead (which will return an Arrow schema instead of a "
"Parquet schema)."),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._schema

@property
Expand All @@ -1989,7 +1989,7 @@ def memory_map(self):
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.memory_map", ""),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._metadata.memory_map

@property
Expand All @@ -1999,7 +1999,7 @@ def read_dictionary(self):
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._metadata.read_dictionary

@property
Expand All @@ -2009,7 +2009,7 @@ def buffer_size(self):
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.buffer_size", ""),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._metadata.buffer_size

_fs = property(
Expand All @@ -2027,7 +2027,7 @@ def fs(self):
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.filesystem' attribute "
"instead."),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._metadata.fs

_common_metadata = property(
Expand All @@ -2041,7 +2041,7 @@ def common_metadata(self):
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.common_metadata", ""),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return self._metadata.common_metadata

@property
Expand Down Expand Up @@ -2453,7 +2453,7 @@ def pieces(self):
warnings.warn(
_DEPR_MSG.format("ParquetDataset.pieces",
" Use the '.fragments' attribute instead"),
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)
return list(self._dataset.get_fragments())

@property
Expand Down Expand Up @@ -2744,7 +2744,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
"Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
"deprecated as of pyarrow 8.0.0, and the legacy implementation will "
"be removed in a future version.",
DeprecationWarning, stacklevel=2)
FutureWarning, stacklevel=2)

if ignore_prefixes is not None:
raise ValueError(
Expand Down
25 changes: 18 additions & 7 deletions python/pyarrow/tests/parquet/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,25 @@
import pyarrow as pa
from pyarrow.tests import util

legacy_filter_mark = pytest.mark.filterwarnings(
"ignore:Passing 'use_legacy:FutureWarning"
)

parametrize_legacy_dataset = pytest.mark.parametrize(
"use_legacy_dataset",
[True, pytest.param(False, marks=pytest.mark.dataset)])
[pytest.param(True, marks=legacy_filter_mark),
pytest.param(False, marks=pytest.mark.dataset)]
)
parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
"use_legacy_dataset", [True, pytest.param(False, marks=pytest.mark.skip)])
"use_legacy_dataset",
[pytest.param(True, marks=legacy_filter_mark),
pytest.param(False, marks=pytest.mark.skip)]
)
parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
"use_legacy_dataset", [pytest.param(True, marks=pytest.mark.xfail),
pytest.param(False, marks=pytest.mark.dataset)])
"use_legacy_dataset",
[pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
pytest.param(False, marks=pytest.mark.dataset)]
)

# Marks all of the tests in this module
# Ignore these with pytest ... -m 'not parquet'
Expand Down Expand Up @@ -58,7 +69,7 @@ def _read_table(*args, **kwargs):


def _roundtrip_table(table, read_table_kwargs=None,
write_table_kwargs=None, use_legacy_dataset=True):
write_table_kwargs=None, use_legacy_dataset=False):
read_table_kwargs = read_table_kwargs or {}
write_table_kwargs = write_table_kwargs or {}

Expand All @@ -70,7 +81,7 @@ def _roundtrip_table(table, read_table_kwargs=None,


def _check_roundtrip(table, expected=None, read_table_kwargs=None,
use_legacy_dataset=True, **write_table_kwargs):
use_legacy_dataset=False, **write_table_kwargs):
if expected is None:
expected = table

Expand All @@ -87,7 +98,7 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
assert result.equals(expected)


def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=True):
def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
table = pa.Table.from_pandas(df)
result = _roundtrip_table(
table, write_table_kwargs=write_kwargs,
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/parquet/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,6 @@ def test_read_table_legacy_deprecated(tempdir):
pq.write_table(table, path)

with pytest.warns(
DeprecationWarning, match="Passing 'use_legacy_dataset=True'"
FutureWarning, match="Passing 'use_legacy_dataset=True'"
):
pq.read_table(path, use_legacy_dataset=True)
49 changes: 27 additions & 22 deletions python/pyarrow/tests/parquet/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_parquet_piece_read(tempdir):
path = tempdir / 'parquet_piece_read.parquet'
_write_table(table, path, version='2.6')

with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning):
piece1 = pq.ParquetDatasetPiece(path)

result = piece1.read()
Expand All @@ -70,7 +70,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
path = tempdir / 'parquet_piece_read.parquet'
_write_table(table, path, version='2.6')

with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning):
piece = pq.ParquetDatasetPiece(path)
table1 = piece.read()
assert isinstance(table1, pa.Table)
Expand All @@ -80,7 +80,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
assert table.equals(table1)


@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
def test_parquet_piece_basics():
path = '/baz.parq'

Expand Down Expand Up @@ -140,7 +140,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
_partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)


@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
def test_create_parquet_dataset_multi_threaded(tempdir):
fs = LocalFileSystem._get_instance()
Expand All @@ -151,7 +151,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir):
manifest = pq.ParquetManifest(base_path, filesystem=fs,
metadata_nthreads=1)
with pytest.warns(
DeprecationWarning, match="Specifying the 'metadata_nthreads'"
FutureWarning, match="Specifying the 'metadata_nthreads'"
):
dataset = pq.ParquetDataset(
base_path, filesystem=fs, metadata_nthreads=16)
Expand Down Expand Up @@ -801,14 +801,14 @@ def _test_read_common_metadata_files(fs, base_path):


@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def test_read_common_metadata_files(tempdir):
fs = LocalFileSystem._get_instance()
_test_read_common_metadata_files(fs, tempdir)


@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def test_read_metadata_files(tempdir):
fs = LocalFileSystem._get_instance()

Expand Down Expand Up @@ -922,7 +922,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
result2 = read_multiple_files(paths, metadata=metadata)
assert result2.equals(expected)

with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read()
assert result3.equals(expected)
else:
Expand Down Expand Up @@ -968,7 +968,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
mixed_paths = [bad_apple_path, paths[0]]

with pytest.raises(ValueError):
with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
read_multiple_files(mixed_paths, schema=bad_meta.schema)

with pytest.raises(ValueError):
Expand Down Expand Up @@ -1014,7 +1014,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)


@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
@parametrize_legacy_dataset
def test_dataset_memory_map(tempdir, use_legacy_dataset):
Expand Down Expand Up @@ -1217,7 +1217,7 @@ def test_empty_directory(tempdir, use_legacy_dataset):
assert result.num_columns == 0


@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def _test_write_to_dataset_with_partitions(base_path,
use_legacy_dataset=True,
filesystem=None,
Expand Down Expand Up @@ -1259,7 +1259,7 @@ def _test_write_to_dataset_with_partitions(base_path,
use_legacy_dataset=use_legacy_dataset)
# ARROW-2209: Ensure the dataset schema also includes the partition columns
if use_legacy_dataset:
with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
dataset_cols = set(dataset.schema.to_arrow_schema().names)
else:
# NB schema property is an arrow and not parquet schema
Expand Down Expand Up @@ -1409,7 +1409,7 @@ def test_write_to_dataset_no_partitions_s3fs(
path, use_legacy_dataset, filesystem=fs)


@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
@parametrize_legacy_dataset_not_supported
def test_write_to_dataset_with_partitions_and_custom_filenames(
Expand Down Expand Up @@ -1569,6 +1569,7 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):

@pytest.mark.dataset
@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
def test_read_table_schema(tempdir):
# test that schema keyword is passed through in read_table
table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
Expand Down Expand Up @@ -1622,6 +1623,7 @@ def test_dataset_unsupported_keywords():


@pytest.mark.dataset
@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
def test_dataset_partitioning(tempdir):
import pyarrow.dataset as ds

Expand Down Expand Up @@ -1669,7 +1671,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
assert result.equals(table)


@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
# ARROW-10462 ensure that on Windows we properly use posix-style paths
# as used by fsspec
Expand All @@ -1693,30 +1695,33 @@ def test_parquet_dataset_deprecated_properties(tempdir):
pq.write_table(table, path)
dataset = pq.ParquetDataset(path)

with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
dataset.pieces

with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
dataset.partitions

with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
dataset.memory_map

with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
dataset.read_dictionary

with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
dataset.buffer_size

with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
dataset.fs

with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
dataset.schema

with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
dataset.common_metadata

dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)

with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
dataset2.pieces


Expand Down