Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
9452f02
Fix resource warnings originating from tests/util.py
milesgranger Nov 24, 2022
c2d6a91
Ignore filesystem.LocalFileSystem warning
milesgranger Nov 24, 2022
860c309
Fix pytest.warns(None) deprecation warning
milesgranger Nov 24, 2022
b6b30d7
Ignore PytestUnraisableExceptionWarning spawning from pyx __del__
milesgranger Nov 24, 2022
df21ec2
Ignore warnings for explicitly used deprecated parquet versions
milesgranger Nov 24, 2022
3038aea
Fix ResourceWarning unclosed socket in StreamReaderServer
milesgranger Nov 24, 2022
2d8fb4a
Fix process still running warning - wait
milesgranger Nov 25, 2022
cbcd6c1
Ignore plasma ResourceWarnings
milesgranger Nov 25, 2022
5842379
Fix ResourceWarning unclosed file in get_metadata
milesgranger Nov 25, 2022
4308f13
Ignore UserWarning on overflow during explicit testing
milesgranger Nov 25, 2022
2146ba4
Fix pandas UserWarning - ORC value set on copy
milesgranger Nov 25, 2022
23692a3
Ignore DeprecationWarning in explict test of parsing tz aware datetimes
milesgranger Nov 25, 2022
38c93ec
get_metadata: Flag reader should close source
milesgranger Nov 28, 2022
eb3feab
Add FutureWarning message to match against in read_table_doesnt_warn
milesgranger Nov 28, 2022
eb4962f
Add back ignorning VisibleDeprecationWarning
milesgranger Nov 29, 2022
8b576c1
pandas.testing specific element-wise comparision warnings
milesgranger Nov 29, 2022
a599d65
CI Test: are 'discarding nonzero nanoseconds' and 'parsing timezone a…
milesgranger Nov 29, 2022
20b1a30
Don't ignore substrait invalid_table_name PytestUnraisableExceptionWa…
milesgranger Nov 29, 2022
ee97c6b
Add specific message to warning filters
milesgranger Nov 29, 2022
ab4a31a
Move warnings catch to specific overflow line
milesgranger Nov 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/pyarrow/parquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,10 @@ def open(self):
reader = self.open_file_func(self.path)
if not isinstance(reader, ParquetFile):
reader = ParquetFile(reader, **self.file_options)

# ensure reader knows it's responsible for closing source
# since we opened the source here internally.
reader._close_source = True
return reader

def read(self, columns=None, use_threads=True, partitions=None,
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def s3_server(s3_connection):
finally:
if proc is not None:
proc.kill()
proc.wait()


@pytest.fixture(scope='session')
Expand All @@ -194,3 +195,4 @@ def gcs_server():
finally:
if proc is not None:
proc.kill()
proc.wait()
16 changes: 9 additions & 7 deletions python/pyarrow/tests/parquet/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from collections import OrderedDict
import io
import warnings

import numpy as np
import pytest
Expand Down Expand Up @@ -617,15 +618,16 @@ def test_read_non_existent_file(tempdir, use_legacy_dataset):

@parametrize_legacy_dataset
def test_read_table_doesnt_warn(datadir, use_legacy_dataset):
with pytest.warns(None) as record:
pq.read_table(datadir / 'v0.7.1.parquet',
use_legacy_dataset=use_legacy_dataset)

if use_legacy_dataset:
# FutureWarning: 'use_legacy_dataset=True'
assert len(record) == 1
msg = "Passing 'use_legacy_dataset=True'"
with pytest.warns(FutureWarning, match=msg):
pq.read_table(datadir / 'v0.7.1.parquet',
use_legacy_dataset=use_legacy_dataset)
else:
assert len(record) == 0
with warnings.catch_warnings():
warnings.simplefilter(action="error")
pq.read_table(datadir / 'v0.7.1.parquet',
use_legacy_dataset=use_legacy_dataset)


@pytest.mark.pandas
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/parquet/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):

with pytest.warns(FutureWarning):
piece = pq.ParquetDatasetPiece(path)

table1 = piece.read()
assert isinstance(table1, pa.Table)
meta1 = piece.get_metadata()
Expand Down
7 changes: 6 additions & 1 deletion python/pyarrow/tests/parquet/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import datetime
import io
import warnings

import numpy as np
import pytest
Expand Down Expand Up @@ -321,7 +322,11 @@ def get_table(pq_reader_method, filename, **kwargs):
# with the default resolution of ns, we get wrong values for INT96
# that are out of bounds for nanosecond range
tab_error = get_table(pq_reader_method, filename)
assert tab_error["a"].to_pylist() != oob_dts
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
"Discarding nonzero nanoseconds in conversion",
UserWarning)
assert tab_error["a"].to_pylist() != oob_dts

# avoid this overflow by specifying the resolution to use for INT96 values
tab_correct = get_table(
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/parquet/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ def test_multi_dataset_metadata(tempdir):
assert md['serialized_size'] > 0


@pytest.mark.filterwarnings("ignore:Parquet format:FutureWarning")
def test_write_metadata(tempdir):
path = str(tempdir / "metadata")
schema = pa.schema([("a", "int64"), ("b", "float64")])
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/parquet/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):


@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:Parquet format '2.0':FutureWarning")
def test_spark_flavor_preserves_pandas_metadata():
df = _test_dataframe(size=100)
df.index = np.arange(0, 10 * len(df), 10)
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def test_resolve_local_path():
assert path == uri


@pytest.mark.filterwarnings("ignore:pyarrow.filesystem.LocalFileSystem")
def test_resolve_home_directory():
uri = '~/myfile.parquet'
fs, path = filesystem.resolve_filesystem_and_path(uri)
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,7 @@ def run(self):
self._batches.append(batch)
finally:
connection.close()
self._sock.close()

def get_result(self):
return (self._schema, self._table if self._do_read_all
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/tests/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ def fix_example_values(actual_cols, expected_cols):
if (name == "map" and
[d.keys() == {'key', 'value'} for m in expected for d in m]):
# convert [{'key': k, 'value': v}, ...] to [(k, v), ...]
col = expected_cols[name].copy()
for i, m in enumerate(expected):
expected_cols[name][i] = [(d['key'], d['value']) for d in m]
col[i] = [(d['key'], d['value']) for d in m]
expected_cols[name] = col
continue

typ = actual[0].__class__
Expand Down
42 changes: 29 additions & 13 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import json
import multiprocessing as mp
import sys
import warnings

from collections import OrderedDict
from datetime import date, datetime, time, timedelta, timezone
Expand Down Expand Up @@ -95,9 +96,12 @@ def _check_pandas_roundtrip(df, expected=None, use_threads=False,
if expected is None:
expected = df

tm.assert_frame_equal(result, expected, check_dtype=check_dtype,
check_index_type=('equiv' if preserve_index
else False))
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "elementwise comparison failed", DeprecationWarning)
tm.assert_frame_equal(result, expected, check_dtype=check_dtype,
check_index_type=('equiv' if preserve_index
else False))


def _check_series_roundtrip(s, type_=None, expected_pa_type=None):
Expand Down Expand Up @@ -239,11 +243,10 @@ def test_rangeindex_doesnt_warn(self):
# attributes -> can be removed if support < pd 0.25 is dropped
df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])

with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter(action="error")
_check_pandas_roundtrip(df, preserve_index=True)

assert len(record) == 0, [r.message for r in record]

def test_multiindex_columns(self):
columns = pd.MultiIndex.from_arrays([
['one', 'two'], ['X', 'Y']
Expand Down Expand Up @@ -290,11 +293,10 @@ def test_multiindex_doesnt_warn(self):
columns = pd.MultiIndex.from_arrays([['one', 'two'], ['X', 'Y']])
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns)

with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter(action="error")
_check_pandas_roundtrip(df, preserve_index=True)

assert len(record) == 0, [r.message for r in record]

def test_integer_index_column(self):
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
_check_pandas_roundtrip(df, preserve_index=True)
Expand Down Expand Up @@ -2111,7 +2113,13 @@ def test_to_list_of_structs_pandas(self):
])

series = pd.Series(data.to_pandas())
tm.assert_series_equal(series, expected)

# pandas.testing generates a
# DeprecationWarning: elementwise comparison failed
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "elementwise comparison failed",
DeprecationWarning)
tm.assert_series_equal(series, expected)

@pytest.mark.parametrize('t,data,expected', [
(
Expand Down Expand Up @@ -2164,9 +2172,16 @@ def test_nested_large_list(self):
s = (pa.array([[[1, 2, 3], [4]], None],
type=pa.large_list(pa.large_list(pa.int64())))
.to_pandas())
tm.assert_series_equal(
s, pd.Series([[[1, 2, 3], [4]], None], dtype=object),
check_names=False)

with warnings.catch_warnings():
warnings.filterwarnings("ignore",
"Creating an ndarray from ragged nested",
np.VisibleDeprecationWarning)
warnings.filterwarnings("ignore", "elementwise comparison failed",
DeprecationWarning)
tm.assert_series_equal(
s, pd.Series([[[1, 2, 3], [4]], None], dtype=object),
check_names=False)

def test_large_binary_list(self):
for list_type_factory in (pa.list_, pa.large_list):
Expand Down Expand Up @@ -4378,6 +4393,7 @@ def make_df_with_timestamps():


@pytest.mark.parquet
@pytest.mark.filterwarnings("ignore:Parquet format '2.0':FutureWarning")
def test_timestamp_as_object_parquet(tempdir):
# Timestamps can be stored as Parquet and reloaded into Pandas with no loss
# of information if the timestamp_as_object option is True.
Expand Down
10 changes: 7 additions & 3 deletions python/pyarrow/tests/test_plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@
import pyarrow as pa


# ignore all Plasma deprecation warnings in this file, we test that the
# warnings are actually raised in test_plasma_deprecated.py
pytestmark = pytest.mark.filterwarnings("ignore:Plasma:DeprecationWarning")
pytestmark = [
# ignore all Plasma deprecation warnings in this file, we test that the
# warnings are actually raised in test_plasma_deprecated.py
pytest.mark.filterwarnings("ignore:Plasma:DeprecationWarning"),
# Ignore other ResourceWarning as plasma is soon to be removed in ~12.0.0
pytest.mark.filterwarnings("ignore:subprocess:ResourceWarning")
]

DEFAULT_PLASMA_STORE_MEMORY = 10 ** 8
USE_VALGRIND = os.getenv("PLASMA_VALGRIND") == "1"
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_substrait.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import os
import pathlib

import pytest

import pyarrow as pa
Expand Down
45 changes: 23 additions & 22 deletions python/pyarrow/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,18 +357,19 @@ def signal_wakeup_fd(*, warn_on_full_buffer=False):

def _ensure_minio_component_version(component, minimum_year):
full_args = [component, '--version']
proc = subprocess.Popen(full_args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, encoding='utf-8')
if proc.wait(10) != 0:
return False
stdout = proc.stdout.read()
pattern = component + r' version RELEASE\.(\d+)-.*'
version_match = re.search(pattern, stdout)
if version_match:
version_year = version_match.group(1)
return int(version_year) >= minimum_year
else:
raise FileNotFoundError("minio component older than the minimum year")
with subprocess.Popen(full_args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, encoding='utf-8') as proc:
if proc.wait(10) != 0:
return False
stdout = proc.stdout.read()
pattern = component + r' version RELEASE\.(\d+)-.*'
version_match = re.search(pattern, stdout)
if version_match:
version_year = version_match.group(1)
return int(version_year) >= minimum_year
else:
raise FileNotFoundError(
"minio component older than the minimum year")


def _wait_for_minio_startup(mcdir, address, access_key, secret_key):
Expand All @@ -385,16 +386,16 @@ def _wait_for_minio_startup(mcdir, address, access_key, secret_key):

def _run_mc_command(mcdir, *args):
full_args = ['mc', '-C', mcdir] + list(args)
proc = subprocess.Popen(full_args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, encoding='utf-8')
retval = proc.wait(10)
cmd_str = ' '.join(full_args)
print(f'Cmd: {cmd_str}')
print(f' Return: {retval}')
print(f' Stdout: {proc.stdout.read()}')
print(f' Stderr: {proc.stderr.read()}')
if retval != 0:
raise ChildProcessError("Could not run mc")
with subprocess.Popen(full_args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, encoding='utf-8') as proc:
retval = proc.wait(10)
cmd_str = ' '.join(full_args)
print(f'Cmd: {cmd_str}')
print(f' Return: {retval}')
print(f' Stdout: {proc.stdout.read()}')
print(f' Stderr: {proc.stderr.read()}')
if retval != 0:
raise ChildProcessError("Could not run mc")


def _configure_s3_limited_user(s3_server, policy):
Expand Down