From 7a324f5ed2513c43e9911d708f456137bcd97a9a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Oct 2020 14:39:22 +0200 Subject: [PATCH 1/2] ARROW-10281: [Python] Fix warnings when running tests --- python/pyarrow/tests/test_convert_builtin.py | 2 +- python/pyarrow/tests/test_dataset.py | 4 ++-- python/pyarrow/tests/test_io.py | 2 +- python/pyarrow/tests/test_ipc.py | 1 + python/pyarrow/tests/test_pandas.py | 10 +++++++--- python/pyarrow/tests/test_parquet.py | 1 + python/pyarrow/tests/test_plasma.py | 6 +++++- 7 files changed, 18 insertions(+), 8 deletions(-) diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 91624d71b8f6..cb6b4b3b1338 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -569,7 +569,7 @@ def test_ndarray_nested_numpy_double(from_pandas, inner_seq): inner_seq([1., 2., 3.]), inner_seq([np.nan]), None - ]) + ], dtype=object) arr = pa.array(data, from_pandas=from_pandas) assert len(arr) == 4 assert arr.null_count == 1 diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 8c7ebc0ee1a5..d2953e606241 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -169,9 +169,9 @@ def multisourcefs(request): with mockfs.open_output_stream(path) as out: pq.write_table(_table_from_pandas(chunk), out) - # create one with schema partitioning by week and color + # create one with schema partitioning by weekday and color mockfs.create_dir('schema') - for part, chunk in df_b.groupby([df_b.date.dt.week, df_b.color]): + for part, chunk in df_b.groupby([df_b.date.dt.dayofweek, df_b.color]): folder = 'schema/{}/{}'.format(*part) path = '{}/chunk.parquet'.format(folder) mockfs.create_dir(folder) diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 7d3237af5053..69dc1355d28c 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -573,7 +573,7 @@ def test_compress_decompress(compression): INPUT_SIZE = 10000 test_data = (np.random.randint(0, 255, size=INPUT_SIZE) .astype(np.uint8) - .tostring()) + .tobytes()) test_buf = pa.py_buffer(test_data) compressed_buf = pa.compress(test_buf, codec=compression) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 3d3e72e61653..62bb1e708a59 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -765,6 +765,7 @@ def test_serialize_pandas_no_preserve_index(): assert_frame_equal(result, df) +@pytest.mark.filterwarnings("ignore:'pyarrow:DeprecationWarning") def test_serialize_with_pandas_objects(): df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3]) s = pd.Series([1, 2, 3, 4]) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 54f4574932eb..f46e75fc3acf 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -1793,7 +1793,7 @@ def test_column_of_arrays_to_py(self): np.arange(5, dtype=dtype), None, np.arange(1, dtype=dtype) - ]) + ], dtype=object) type_ = pa.list_(pa.int8()) parr = pa.array(arr, type=type_) @@ -2090,7 +2090,7 @@ def test_nested_large_list(self): type=pa.large_list(pa.large_list(pa.int64()))) .to_pandas()) tm.assert_series_equal( - s, pd.Series([[[1, 2, 3], [4]], None]), + s, pd.Series([[[1, 2, 3], [4]], None], dtype=object), check_names=False) def test_large_binary_list(self): @@ -2717,7 +2717,11 @@ def test_safe_unsafe_casts(self): def test_error_sparse(self): # ARROW-2818 - df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])}) + try: + df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])}) + except AttributeError: + # pandas.arrays module introduced in pandas 0.24 + df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])}) with pytest.raises(TypeError, match="Sparse pandas data"): pa.Table.from_pandas(df) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 34528b68ba1a..37e104303af4 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -4195,6 +4195,7 @@ def test_filter_before_validate_schema(tempdir, use_legacy_dataset): @pytest.mark.pandas @pytest.mark.fastparquet @pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning") +@pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet") def test_fastparquet_cross_compatibility(tempdir): fp = pytest.importorskip('fastparquet') diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py index c5747359a5ea..3c00c29bb366 100644 --- a/python/pyarrow/tests/test_plasma.py +++ b/python/pyarrow/tests/test_plasma.py @@ -307,6 +307,8 @@ def test_put_and_get(self): [result] = self.plasma_client.get([object_id], timeout_ms=0) assert result == pa.plasma.ObjectNotAvailable + @pytest.mark.filterwarnings( + "ignore:'pyarrow.deserialize':DeprecationWarning") def test_put_and_get_raw_buffer(self): temp_id = random_object_id() use_meta = b"RAW" @@ -338,6 +340,8 @@ def deserialize_or_output(data_tuple): result = deserialize_or_output(result) assert result == pa.plasma.ObjectNotAvailable + @pytest.mark.filterwarnings( + "ignore:'serialization_context':DeprecationWarning") def test_put_and_get_serialization_context(self): class CustomType: @@ -349,7 +353,7 @@ def __init__(self, val): with pytest.raises(pa.ArrowSerializationError): self.plasma_client.put(val) - serialization_context = pa.SerializationContext() + serialization_context = pa.lib.SerializationContext() serialization_context.register_type(CustomType, 20*"\x00") object_id = self.plasma_client.put( From 5d5649d15bcd23106b338e1332a865d1b61f3014 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Oct 2020 14:45:40 +0200 Subject: [PATCH 2/2] remove usage of pandas.util.testing --- python/pyarrow/tests/test_adhoc_memory_leak.py | 2 +- python/scripts/test_leak.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py index d95444d2d469..cd381cf427dc 100644 --- a/python/pyarrow/tests/test_adhoc_memory_leak.py +++ b/python/pyarrow/tests/test_adhoc_memory_leak.py @@ -32,7 +32,7 @@ @pytest.mark.pandas def test_deserialize_pandas_arrow_7956(): df = pd.DataFrame({'a': np.arange(10000), - 'b': [pd.util.testing.rands(5) for _ in range(10000)]}) + 'b': [test_util.rands(5) for _ in range(10000)]}) def action(): df_bytes = pa.ipc.serialize_pandas(df).to_pybytes() diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py index 83aac1349696..f2bbe8d051bf 100644 --- a/python/scripts/test_leak.py +++ b/python/scripts/test_leak.py @@ -20,7 +20,7 @@ import pyarrow as pa import numpy as np import pandas as pd -import pandas.util.testing as tm +from pyarrow.tests.util import rands import memory_profiler import gc import io @@ -85,7 +85,7 @@ def test_leak3(): for i in range(50)}) table = pa.Table.from_pandas(df, preserve_index=False) - writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet', + writer = pq.ParquetWriter('leak_test_' + rands(5) + '.parquet', table.schema) def func():