diff --git a/ci/travis_lint.sh b/ci/travis_lint.sh index 9b7b474524f..8c956646cb3 100755 --- a/ci/travis_lint.sh +++ b/ci/travis_lint.sh @@ -31,10 +31,10 @@ popd # Fail fast on style checks sudo pip install flake8 -PYARROW_DIR=$TRAVIS_BUILD_DIR/python +PYARROW_DIR=$TRAVIS_BUILD_DIR/python/pyarrow -flake8 --count $PYTHON_DIR/pyarrow +flake8 --count $PYARROW_DIR # Check Cython files with some checks turned off flake8 --count --config=$PYTHON_DIR/.flake8.cython \ - $PYTHON_DIR/pyarrow + $PYARROW_DIR diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index f402defc9b0..2129e705875 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -47,7 +47,7 @@ cdef _is_array_like(obj): try: import pandas return isinstance(obj, (np.ndarray, pd.Series, pd.Index, Categorical)) - except: + except ImportError: return isinstance(obj, np.ndarray) diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py index aba76a008a0..2091c9154fd 100644 --- a/python/pyarrow/feather.py +++ b/python/pyarrow/feather.py @@ -116,7 +116,7 @@ def write_feather(df, dest): writer = FeatherWriter(dest) try: writer.write(df) - except: + except Exception: # Try to make sure the resource is closed import gc writer = None diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi index e6285e465d2..e6538132358 100644 --- a/python/pyarrow/io-hdfs.pxi +++ b/python/pyarrow/io-hdfs.pxi @@ -32,7 +32,7 @@ def have_libhdfs(): with nogil: check_status(HaveLibHdfs()) return True - except: + except Exception: return False @@ -41,7 +41,7 @@ def have_libhdfs3(): with nogil: check_status(HaveLibHdfs3()) return True - except: + except Exception: return False diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py index b6a7b1244e0..0a40f5fb7fd 100644 --- a/python/pyarrow/parquet.py +++ b/python/pyarrow/parquet.py @@ -915,7 +915,7 @@ def write_table(table, where, row_group_size=None, version='1.0', use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, **kwargs) writer.write_table(table, row_group_size=row_group_size) - except: + except Exception: if writer is not None: writer.close() if isinstance(where, six.string_types): diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index eed6aae8373..9dc8ee6dee9 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -20,10 +20,10 @@ import numpy as np -import pyarrow as pa from pyarrow import serialize_pandas, deserialize_pandas from pyarrow.lib import _default_serialization_context + def register_default_serialization_handlers(serialization_context): # ---------------------------------------------------------------------- @@ -43,58 +43,48 @@ def register_default_serialization_handlers(serialization_context): custom_serializer=lambda obj: str(obj), custom_deserializer=lambda data: long(data)) # noqa: F821 - def _serialize_ordered_dict(obj): return list(obj.keys()), list(obj.values()) - def _deserialize_ordered_dict(data): return OrderedDict(zip(data[0], data[1])) - serialization_context.register_type( OrderedDict, "OrderedDict", custom_serializer=_serialize_ordered_dict, custom_deserializer=_deserialize_ordered_dict) - def _serialize_default_dict(obj): return list(obj.keys()), list(obj.values()), obj.default_factory - def _deserialize_default_dict(data): return defaultdict(data[2], zip(data[0], data[1])) - serialization_context.register_type( defaultdict, "defaultdict", custom_serializer=_serialize_default_dict, custom_deserializer=_deserialize_default_dict) - serialization_context.register_type( type(lambda: 0), "function", pickle=True) # ---------------------------------------------------------------------- # Set up serialization for numpy with dtype object (primitive types are - # handled efficiently with Arrow's Tensor facilities, see python_to_arrow.cc) - + # handled efficiently with Arrow's Tensor facilities, see + # python_to_arrow.cc) def _serialize_numpy_array(obj): return obj.tolist(), obj.dtype.str - def _deserialize_numpy_array(data): return np.array(data[0], dtype=np.dtype(data[1])) - serialization_context.register_type( np.ndarray, 'np.array', custom_serializer=_serialize_numpy_array, custom_deserializer=_deserialize_numpy_array) - # ---------------------------------------------------------------------- # Set up serialization for pandas Series and DataFrame diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index 76f0844fa48..810ee3c8cf8 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -289,7 +289,7 @@ def test_delete_partial_file_on_error(self): path = random_path() try: write_feather(df, path) - except: + except Exception: pass assert not os.path.exists(path) diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index 67798ac31dc..7878a09228d 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -62,7 +62,7 @@ def assert_equal(obj1, obj2): # Workaround to make comparison of OrderedDicts work on Python 2.7 if obj1 == obj2: return - except: + except Exception: pass if obj1.__dict__ == {}: print("WARNING: Empty dict in ", obj1) @@ -300,6 +300,7 @@ def test_datetime_serialization(large_memory_map): for d in data: serialization_roundtrip(d, mmap) + def test_torch_serialization(large_memory_map): pytest.importorskip("torch") import torch @@ -311,6 +312,7 @@ def test_torch_serialization(large_memory_map): obj = torch.from_numpy(np.random.randn(1000).astype(t)) serialization_roundtrip(obj, mmap) + def test_numpy_immutable(large_memory_map): with pa.memory_map(large_memory_map, mode="r+") as mmap: obj = np.zeros([10]) @@ -342,6 +344,7 @@ def deserialize_dummy_class(serialized_obj): pa.serialize(DummyClass()) + def test_buffer_serialization(): class BufferClass(object): @@ -371,24 +374,24 @@ def huge_memory_map(temp_dir): # Test that objects that are too large for Arrow throw a Python # exception. These tests give out of memory errors on Travis and need # to be run on a machine with lots of RAM. - l = 2 ** 29 * [1.0] - serialization_roundtrip(l, mmap) - del l - l = 2 ** 29 * ["s"] - serialization_roundtrip(l, mmap) - del l - l = 2 ** 29 * [["1"], 2, 3, [{"s": 4}]] - serialization_roundtrip(l, mmap) - del l - l = 2 ** 29 * [{"s": 1}] + 2 ** 29 * [1.0] - serialization_roundtrip(l, mmap) - del l - l = np.zeros(2 ** 25) - serialization_roundtrip(l, mmap) - del l - l = [np.zeros(2 ** 18) for _ in range(2 ** 7)] - serialization_roundtrip(l, mmap) - del l + x = 2 ** 29 * [1.0] + serialization_roundtrip(x, mmap) + del x + x = 2 ** 29 * ["s"] + serialization_roundtrip(x, mmap) + del x + x = 2 ** 29 * [["1"], 2, 3, [{"s": 4}]] + serialization_roundtrip(x, mmap) + del x + x = 2 ** 29 * [{"s": 1}] + 2 ** 29 * [1.0] + serialization_roundtrip(x, mmap) + del x + x = np.zeros(2 ** 25) + serialization_roundtrip(x, mmap) + del x + x = [np.zeros(2 ** 18) for _ in range(2 ** 7)] + serialization_roundtrip(x, mmap) + del x def test_serialization_callback_error():