From 9452f02ed2bc49cd33d8b020ec56070fc53a3b71 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 09:52:26 +0100 Subject: [PATCH 01/20] Fix resource warnings originating from tests/util.py --- python/pyarrow/tests/util.py | 45 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index ddeca128791..df7936371ee 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -357,18 +357,19 @@ def signal_wakeup_fd(*, warn_on_full_buffer=False): def _ensure_minio_component_version(component, minimum_year): full_args = [component, '--version'] - proc = subprocess.Popen(full_args, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, encoding='utf-8') - if proc.wait(10) != 0: - return False - stdout = proc.stdout.read() - pattern = component + r' version RELEASE\.(\d+)-.*' - version_match = re.search(pattern, stdout) - if version_match: - version_year = version_match.group(1) - return int(version_year) >= minimum_year - else: - raise FileNotFoundError("minio component older than the minimum year") + with subprocess.Popen(full_args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding='utf-8') as proc: + if proc.wait(10) != 0: + return False + stdout = proc.stdout.read() + pattern = component + r' version RELEASE\.(\d+)-.*' + version_match = re.search(pattern, stdout) + if version_match: + version_year = version_match.group(1) + return int(version_year) >= minimum_year + else: + raise FileNotFoundError( + "minio component older than the minimum year") def _wait_for_minio_startup(mcdir, address, access_key, secret_key): @@ -385,16 +386,16 @@ def _wait_for_minio_startup(mcdir, address, access_key, secret_key): def _run_mc_command(mcdir, *args): full_args = ['mc', '-C', mcdir] + list(args) - proc = subprocess.Popen(full_args, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, encoding='utf-8') - retval = proc.wait(10) - cmd_str = ' '.join(full_args) - print(f'Cmd: {cmd_str}') - print(f' Return: {retval}') - print(f' Stdout: {proc.stdout.read()}') - print(f' Stderr: {proc.stderr.read()}') - if retval != 0: - raise ChildProcessError("Could not run mc") + with subprocess.Popen(full_args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding='utf-8') as proc: + retval = proc.wait(10) + cmd_str = ' '.join(full_args) + print(f'Cmd: {cmd_str}') + print(f' Return: {retval}') + print(f' Stdout: {proc.stdout.read()}') + print(f' Stderr: {proc.stderr.read()}') + if retval != 0: + raise ChildProcessError("Could not run mc") def _configure_s3_limited_user(s3_server, policy): From c2d6a917ec82653fe88212bd5e13850619ec909a Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 13:16:57 +0100 Subject: [PATCH 02/20] Ignore filesystem.LocalFileSystem warning Use fs.LocalFileSystem instead --- python/pyarrow/tests/test_filesystem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_filesystem.py b/python/pyarrow/tests/test_filesystem.py index 33ac4b8517f..9862c5990d7 100644 --- a/python/pyarrow/tests/test_filesystem.py +++ b/python/pyarrow/tests/test_filesystem.py @@ -63,6 +63,7 @@ def test_resolve_local_path(): assert path == uri +@pytest.mark.filterwarnings("ignore:pyarrow.filesystem.LocalFileSystem") def test_resolve_home_directory(): uri = '~/myfile.parquet' fs, path = filesystem.resolve_filesystem_and_path(uri) From 860c309542c288be3c98c755089ec3294203b6e8 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 13:33:06 +0100 Subject: [PATCH 03/20] Fix pytest.warns(None) deprecation warning https://docs.pytest.org/en/latest/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests --- python/pyarrow/tests/parquet/test_basic.py | 15 ++++++++------- python/pyarrow/tests/test_pandas.py | 11 +++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 62ea19d422d..3a7401795f7 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -17,6 +17,7 @@ from collections import OrderedDict import io +import warnings import numpy as np import pytest @@ -617,15 +618,15 @@ def test_read_non_existent_file(tempdir, use_legacy_dataset): @parametrize_legacy_dataset def test_read_table_doesnt_warn(datadir, use_legacy_dataset): - with pytest.warns(None) as record: - pq.read_table(datadir / 'v0.7.1.parquet', - use_legacy_dataset=use_legacy_dataset) - if use_legacy_dataset: - # FutureWarning: 'use_legacy_dataset=True' - assert len(record) == 1 + with pytest.warns(FutureWarning): + pq.read_table(datadir / 'v0.7.1.parquet', + use_legacy_dataset=use_legacy_dataset) else: - assert len(record) == 0 + with warnings.catch_warnings(): + warnings.simplefilter(action="error") + pq.read_table(datadir / 'v0.7.1.parquet', + use_legacy_dataset=use_legacy_dataset) @pytest.mark.pandas diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index a1ab4d43881..d6e15bc7e5f 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -20,6 +20,7 @@ import json import multiprocessing as mp import sys +import warnings from collections import OrderedDict from datetime import date, datetime, time, timedelta, timezone @@ -239,11 +240,10 @@ def test_rangeindex_doesnt_warn(self): # attributes -> can be removed if support < pd 0.25 is dropped df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter(action="error") _check_pandas_roundtrip(df, preserve_index=True) - assert len(record) == 0, [r.message for r in record] - def test_multiindex_columns(self): columns = pd.MultiIndex.from_arrays([ ['one', 'two'], ['X', 'Y'] @@ -290,11 +290,10 @@ def test_multiindex_doesnt_warn(self): columns = pd.MultiIndex.from_arrays([['one', 'two'], ['X', 'Y']]) df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns) - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter(action="error") _check_pandas_roundtrip(df, preserve_index=True) - assert len(record) == 0, [r.message for r in record] - def test_integer_index_column(self): df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')]) _check_pandas_roundtrip(df, preserve_index=True) From b6b30d7f32df97bc25167e11178998a6ecd5adce Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 14:40:46 +0100 Subject: [PATCH 04/20] Ignore PytestUnraisableExceptionWarning spawning from pyx __del__ --- python/pyarrow/tests/test_substrait.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py index 030e4aad820..1ad3718312b 100644 --- a/python/pyarrow/tests/test_substrait.py +++ b/python/pyarrow/tests/test_substrait.py @@ -17,6 +17,7 @@ import os import pathlib + import pytest import pyarrow as pa @@ -223,7 +224,11 @@ def table_provider(names): assert res_tb == test_table_1 +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") def test_named_table_invalid_table_name(): + # Warning generated from pyx which is unable to be propagated to caller + # ref: https://docs.pytest.org/en/6.2.x/usage.html#warning-about + # -unraisable-exceptions-and-unhandled-thread-exceptions test_table_1 = pa.Table.from_pydict({"x": [1, 2, 3]}) def table_provider(names): From df21ec2c352cd8723161ac76cd7f08e63276cbf7 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 14:46:56 +0100 Subject: [PATCH 05/20] Ignore warnings for explicitly used deprecated parquet versions --- python/pyarrow/tests/parquet/test_metadata.py | 1 + python/pyarrow/tests/parquet/test_pandas.py | 1 + python/pyarrow/tests/test_pandas.py | 1 + 3 files changed, 3 insertions(+) diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index 2c6f250452d..fef1cc564b4 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -400,6 +400,7 @@ def test_multi_dataset_metadata(tempdir): assert md['serialized_size'] > 0 +@pytest.mark.filterwarnings("ignore:Parquet format:FutureWarning") def test_write_metadata(tempdir): path = str(tempdir / "metadata") schema = pa.schema([("a", "int64"), ("b", "float64")]) diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py index 3bc204c978a..c153db4aa00 100644 --- a/python/pyarrow/tests/parquet/test_pandas.py +++ b/python/pyarrow/tests/parquet/test_pandas.py @@ -314,6 +314,7 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset): @pytest.mark.pandas +@pytest.mark.filterwarnings("ignore:Parquet format '2.0':FutureWarning") def test_spark_flavor_preserves_pandas_metadata(): df = _test_dataframe(size=100) df.index = np.arange(0, 10 * len(df), 10) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index d6e15bc7e5f..d9f24ab3454 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -4377,6 +4377,7 @@ def make_df_with_timestamps(): @pytest.mark.parquet +@pytest.mark.filterwarnings("ignore:Parquet format '2.0':FutureWarning") def test_timestamp_as_object_parquet(tempdir): # Timestamps can be stored as Parquet and reloaded into Pandas with no loss # of information if the timestamp_as_object option is True. From 3038aea678bf3a2100da73be1da6e9639507c74b Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 24 Nov 2022 16:29:19 +0100 Subject: [PATCH 06/20] Fix ResourceWarning unclosed socket in StreamReaderServer --- python/pyarrow/tests/test_ipc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 5237e3b4f03..d9abe987ae6 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -846,6 +846,7 @@ def run(self): self._batches.append(batch) finally: connection.close() + self._sock.close() def get_result(self): return (self._schema, self._table if self._do_read_all From 2d8fb4aff212d17f048d845b5ecea06a6622d1e7 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 10:20:50 +0100 Subject: [PATCH 07/20] Fix process still running warning - wait --- python/pyarrow/tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index a06ac92095b..154ef79dcd5 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -174,6 +174,7 @@ def s3_server(s3_connection): finally: if proc is not None: proc.kill() + proc.wait() @pytest.fixture(scope='session') @@ -194,3 +195,4 @@ def gcs_server(): finally: if proc is not None: proc.kill() + proc.wait() From cbcd6c1d5164c51650486bf36e329ad1eb7718f2 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 11:04:08 +0100 Subject: [PATCH 08/20] Ignore plasma ResourceWarnings --- python/pyarrow/tests/test_plasma.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py index cc5fd0357dd..9b5862e6483 100644 --- a/python/pyarrow/tests/test_plasma.py +++ b/python/pyarrow/tests/test_plasma.py @@ -30,9 +30,13 @@ import pyarrow as pa -# ignore all Plasma deprecation warnings in this file, we test that the -# warnings are actually raised in test_plasma_deprecated.py -pytestmark = pytest.mark.filterwarnings("ignore:Plasma:DeprecationWarning") +pytestmark = [ + # ignore all Plasma deprecation warnings in this file, we test that the + # warnings are actually raised in test_plasma_deprecated.py + pytest.mark.filterwarnings("ignore:Plasma:DeprecationWarning"), + # Ignore other ResourceWarning as plasma is soon to be removed in ~12.0.0 + pytest.mark.filterwarnings("ignore:subprocess:ResourceWarning") +] DEFAULT_PLASMA_STORE_MEMORY = 10 ** 8 USE_VALGRIND = os.getenv("PLASMA_VALGRIND") == "1" From 584237957253578eb56afa2173af2b8e22858225 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 11:22:03 +0100 Subject: [PATCH 09/20] Fix ResourceWarning unclosed file in get_metadata --- python/pyarrow/parquet/core.py | 6 ++++-- python/pyarrow/tests/parquet/test_dataset.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 34856beb78d..e1dcd232939 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -1165,8 +1165,10 @@ def get_metadata(self): ------- metadata : FileMetaData """ - with self.open() as parquet: - return parquet.metadata + parquet = self.open() + meta = parquet.metadata + parquet.close(True) + return meta def open(self): """ diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 654fd4ddc11..0fabbb5c977 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -81,6 +81,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir): with pytest.warns(FutureWarning): piece = pq.ParquetDatasetPiece(path) + table1 = piece.read() assert isinstance(table1, pa.Table) meta1 = piece.get_metadata() From 4308f132a74622d8f799aee3f4a2b025296497e7 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 11:26:10 +0100 Subject: [PATCH 10/20] Ignore UserWarning on overflow during explicit testing --- python/pyarrow/tests/parquet/test_datetime.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py index e10d4fd776d..686ff64c175 100644 --- a/python/pyarrow/tests/parquet/test_datetime.py +++ b/python/pyarrow/tests/parquet/test_datetime.py @@ -297,6 +297,8 @@ def test_coerce_int96_timestamp_unit(unit): @pytest.mark.pandas @pytest.mark.parametrize('pq_reader_method', ['ParquetFile', 'read_table']) +@pytest.mark.filterwarnings( + "ignore:Discarding nonzero nanoseconds in conversion:UserWarning") def test_coerce_int96_timestamp_overflow(pq_reader_method, tempdir): def get_table(pq_reader_method, filename, **kwargs): From 2146ba452555dbc8acd0d1cd443e4d3303a98929 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 12:00:01 +0100 Subject: [PATCH 11/20] Fix pandas UserWarning - ORC value set on copy --- python/pyarrow/tests/test_orc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py index a109ab3472f..1b467d52330 100644 --- a/python/pyarrow/tests/test_orc.py +++ b/python/pyarrow/tests/test_orc.py @@ -52,8 +52,10 @@ def fix_example_values(actual_cols, expected_cols): if (name == "map" and [d.keys() == {'key', 'value'} for m in expected for d in m]): # convert [{'key': k, 'value': v}, ...] to [(k, v), ...] + col = expected_cols[name].copy() for i, m in enumerate(expected): - expected_cols[name][i] = [(d['key'], d['value']) for d in m] + col[i] = [(d['key'], d['value']) for d in m] + expected_cols[name] = col continue typ = actual[0].__class__ From 23692a3fba177156a6e003beaf64e7b6eb18cc31 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Fri, 25 Nov 2022 12:34:30 +0100 Subject: [PATCH 12/20] Ignore DeprecationWarning in explict test of parsing tz aware datetimes --- python/pyarrow/tests/test_pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index d9f24ab3454..9c6af974715 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -183,6 +183,8 @@ def test_column_index_names_are_preserved(self): df.columns.names = ['a'] _check_pandas_roundtrip(df, preserve_index=True) + @pytest.mark.filterwarnings( + "ignore:parsing timezone aware datetimes:DeprecationWarning") def test_column_index_names_with_tz(self): # ARROW-13756 # Bug if index is timezone aware DataTimeIndex From 38c93ece8451134537b609182f2ed8ca7996cde4 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 28 Nov 2022 15:19:52 +0100 Subject: [PATCH 13/20] get_metadata: Flag reader should close source --- python/pyarrow/parquet/core.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index e1dcd232939..16ef9d09a0a 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -1165,10 +1165,8 @@ def get_metadata(self): ------- metadata : FileMetaData """ - parquet = self.open() - meta = parquet.metadata - parquet.close(True) - return meta + with self.open() as parquet: + return parquet.metadata def open(self): """ @@ -1177,6 +1175,10 @@ def open(self): reader = self.open_file_func(self.path) if not isinstance(reader, ParquetFile): reader = ParquetFile(reader, **self.file_options) + + # ensure reader knows it's responsible for closing source + # since we opened the source here internally. + reader._close_source = True return reader def read(self, columns=None, use_threads=True, partitions=None, From eb3feab8242702caf361726b91b6609023203c43 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 28 Nov 2022 15:29:16 +0100 Subject: [PATCH 14/20] Add FutureWarning message to match against in read_table_doesnt_warn --- python/pyarrow/tests/parquet/test_basic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 3a7401795f7..05321a937b5 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -619,7 +619,8 @@ def test_read_non_existent_file(tempdir, use_legacy_dataset): @parametrize_legacy_dataset def test_read_table_doesnt_warn(datadir, use_legacy_dataset): if use_legacy_dataset: - with pytest.warns(FutureWarning): + msg = "Passing 'use_legacy_dataset=True'" + with pytest.warns(FutureWarning, match=msg): pq.read_table(datadir / 'v0.7.1.parquet', use_legacy_dataset=use_legacy_dataset) else: From eb4962f9d0362db00a2bf0f06d70568681c3ee9a Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 10:52:17 +0100 Subject: [PATCH 15/20] Add back ignorning VisibleDeprecationWarning --- python/pyarrow/tests/test_pandas.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 9c6af974715..5f12a3e792d 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2165,9 +2165,15 @@ def test_nested_large_list(self): s = (pa.array([[[1, 2, 3], [4]], None], type=pa.large_list(pa.large_list(pa.int64()))) .to_pandas()) - tm.assert_series_equal( - s, pd.Series([[[1, 2, 3], [4]], None], dtype=object), - check_names=False) + + # pandas.testing generates a + # numpy.VisibleDeprecationWarning: Creating an ndarray + # from ragged nested sequences ... + with warnings.catch_warnings(): + warnings.simplefilter("ignore", np.VisibleDeprecationWarning) + tm.assert_series_equal( + s, pd.Series([[[1, 2, 3], [4]], None], dtype=object), + check_names=False) def test_large_binary_list(self): for list_type_factory in (pa.list_, pa.large_list): From 8b576c1f61179b62181a354383baea42eabf6584 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 11:00:32 +0100 Subject: [PATCH 16/20] pandas.testing specific element-wise comparision warnings --- python/pyarrow/tests/test_pandas.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 5f12a3e792d..a857e545e6f 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -96,9 +96,13 @@ def _check_pandas_roundtrip(df, expected=None, use_threads=False, if expected is None: expected = df - tm.assert_frame_equal(result, expected, check_dtype=check_dtype, - check_index_type=('equiv' if preserve_index - else False)) + # pandas.testing generates a + # DeprecationWarning: elementwise comparison failed + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + tm.assert_frame_equal(result, expected, check_dtype=check_dtype, + check_index_type=('equiv' if preserve_index + else False)) def _check_series_roundtrip(s, type_=None, expected_pa_type=None): @@ -2112,7 +2116,12 @@ def test_to_list_of_structs_pandas(self): ]) series = pd.Series(data.to_pandas()) - tm.assert_series_equal(series, expected) + + # pandas.testing generates a + # DeprecationWarning: elementwise comparison failed + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + tm.assert_series_equal(series, expected) @pytest.mark.parametrize('t,data,expected', [ ( @@ -2167,10 +2176,12 @@ def test_nested_large_list(self): .to_pandas()) # pandas.testing generates a + # DeprecationWarning: elementwise comparison failed # numpy.VisibleDeprecationWarning: Creating an ndarray # from ragged nested sequences ... with warnings.catch_warnings(): warnings.simplefilter("ignore", np.VisibleDeprecationWarning) + warnings.simplefilter("ignore", DeprecationWarning) tm.assert_series_equal( s, pd.Series([[[1, 2, 3], [4]], None], dtype=object), check_names=False) From a599d65fd09579c5d1a4195a7f41f356b958d544 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 11:48:33 +0100 Subject: [PATCH 17/20] CI Test: are 'discarding nonzero nanoseconds' and 'parsing timezone aware datetimes' warnings gone? --- python/pyarrow/tests/parquet/test_datetime.py | 2 -- python/pyarrow/tests/test_pandas.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py index 686ff64c175..e10d4fd776d 100644 --- a/python/pyarrow/tests/parquet/test_datetime.py +++ b/python/pyarrow/tests/parquet/test_datetime.py @@ -297,8 +297,6 @@ def test_coerce_int96_timestamp_unit(unit): @pytest.mark.pandas @pytest.mark.parametrize('pq_reader_method', ['ParquetFile', 'read_table']) -@pytest.mark.filterwarnings( - "ignore:Discarding nonzero nanoseconds in conversion:UserWarning") def test_coerce_int96_timestamp_overflow(pq_reader_method, tempdir): def get_table(pq_reader_method, filename, **kwargs): diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index a857e545e6f..bee1fcb14e9 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -187,8 +187,6 @@ def test_column_index_names_are_preserved(self): df.columns.names = ['a'] _check_pandas_roundtrip(df, preserve_index=True) - @pytest.mark.filterwarnings( - "ignore:parsing timezone aware datetimes:DeprecationWarning") def test_column_index_names_with_tz(self): # ARROW-13756 # Bug if index is timezone aware DataTimeIndex From 20b1a30622ecf722783c1f0485d5f37c110a8deb Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 12:12:03 +0100 Subject: [PATCH 18/20] Don't ignore substrait invalid_table_name PytestUnraisableExceptionWarning [skip ci] --- python/pyarrow/tests/test_substrait.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py index 1ad3718312b..7cd4459f6fd 100644 --- a/python/pyarrow/tests/test_substrait.py +++ b/python/pyarrow/tests/test_substrait.py @@ -224,11 +224,7 @@ def table_provider(names): assert res_tb == test_table_1 -@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") def test_named_table_invalid_table_name(): - # Warning generated from pyx which is unable to be propagated to caller - # ref: https://docs.pytest.org/en/6.2.x/usage.html#warning-about - # -unraisable-exceptions-and-unhandled-thread-exceptions test_table_1 = pa.Table.from_pydict({"x": [1, 2, 3]}) def table_provider(names): From ee97c6bdc475b6e8e78c827cfea278272189f5c8 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 12:33:24 +0100 Subject: [PATCH 19/20] Add specific message to warning filters --- python/pyarrow/tests/test_pandas.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index bee1fcb14e9..c7b0bf783dc 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -96,10 +96,9 @@ def _check_pandas_roundtrip(df, expected=None, use_threads=False, if expected is None: expected = df - # pandas.testing generates a - # DeprecationWarning: elementwise comparison failed with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) + warnings.filterwarnings( + "ignore", "elementwise comparison failed", DeprecationWarning) tm.assert_frame_equal(result, expected, check_dtype=check_dtype, check_index_type=('equiv' if preserve_index else False)) @@ -2118,7 +2117,8 @@ def test_to_list_of_structs_pandas(self): # pandas.testing generates a # DeprecationWarning: elementwise comparison failed with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) + warnings.filterwarnings("ignore", "elementwise comparison failed", + DeprecationWarning) tm.assert_series_equal(series, expected) @pytest.mark.parametrize('t,data,expected', [ @@ -2173,13 +2173,12 @@ def test_nested_large_list(self): type=pa.large_list(pa.large_list(pa.int64()))) .to_pandas()) - # pandas.testing generates a - # DeprecationWarning: elementwise comparison failed - # numpy.VisibleDeprecationWarning: Creating an ndarray - # from ragged nested sequences ... with warnings.catch_warnings(): - warnings.simplefilter("ignore", np.VisibleDeprecationWarning) - warnings.simplefilter("ignore", DeprecationWarning) + warnings.filterwarnings("ignore", + "Creating an ndarray from ragged nested", + np.VisibleDeprecationWarning) + warnings.filterwarnings("ignore", "elementwise comparison failed", + DeprecationWarning) tm.assert_series_equal( s, pd.Series([[[1, 2, 3], [4]], None], dtype=object), check_names=False) From ab4a31a4a1dbca63f8e35eb3553f8b43f9d0f71a Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Tue, 29 Nov 2022 13:29:52 +0100 Subject: [PATCH 20/20] Move warnings catch to specific overflow line --- python/pyarrow/tests/parquet/test_datetime.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py index e10d4fd776d..52f3f5ce4a0 100644 --- a/python/pyarrow/tests/parquet/test_datetime.py +++ b/python/pyarrow/tests/parquet/test_datetime.py @@ -17,6 +17,7 @@ import datetime import io +import warnings import numpy as np import pytest @@ -321,7 +322,11 @@ def get_table(pq_reader_method, filename, **kwargs): # with the default resolution of ns, we get wrong values for INT96 # that are out of bounds for nanosecond range tab_error = get_table(pq_reader_method, filename) - assert tab_error["a"].to_pylist() != oob_dts + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + "Discarding nonzero nanoseconds in conversion", + UserWarning) + assert tab_error["a"].to_pylist() != oob_dts # avoid this overflow by specifying the resolution to use for INT96 values tab_correct = get_table(