From 2a58a1b61b6e8cfb7b3ce3141a4fc731fa820f6e Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 3 Apr 2017 20:29:58 -0400 Subject: [PATCH 1/2] Add a nicer exception hierarchy. Unknown errors return as ValueError Change-Id: Id73771a791180dd48a6855355d0e42b605fdbd47 --- python/pyarrow/__init__.py | 6 ++++- python/pyarrow/error.pyx | 33 +++++++++++++++++++++++++--- python/pyarrow/includes/common.pxd | 1 + python/pyarrow/tests/test_feather.py | 2 +- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 6860f986fb6..d6c7231abe0 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -38,7 +38,11 @@ ListArray, StringArray, DictionaryArray) -from pyarrow.error import ArrowException +from pyarrow.error import (ArrowException, + ArrowKeyError, + ArrowIOError, + ArrowMemoryError, + ArrowNotImplementedError) from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface, diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx index b8a82b3754c..0cf4df6282f 100644 --- a/python/pyarrow/error.pyx +++ b/python/pyarrow/error.pyx @@ -19,13 +19,40 @@ from pyarrow.includes.libarrow cimport CStatus from pyarrow.includes.common cimport c_string from pyarrow.compat import frombytes -class ArrowException(Exception): + +class ArrowException(ValueError): + pass + + +class ArrowMemoryError(MemoryError): + pass + + +class ArrowIOError(IOError): pass + +class ArrowKeyError(KeyError): + pass + + +class ArrowNotImplementedError(NotImplementedError): + pass + + cdef int check_status(const CStatus& status) nogil except -1: if status.ok(): return 0 - cdef c_string c_message = status.ToString() with gil: - raise ArrowException(frombytes(c_message)) + message = frombytes(status.ToString()) + if status.IsIOError(): + raise ArrowIOError(message) + elif status.IsOutOfMemory(): + raise ArrowMemoryError(message) + elif status.IsKeyError(): + raise ArrowKeyError(message) + elif status.IsNotImplemented(): + raise ArrowNotImplementedError(message) + else: + raise ArrowException(message) diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index f689bdc3fd8..8edb5fdb431 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -43,6 +43,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_string ToString() c_bool ok() + c_bool IsIOError() c_bool IsOutOfMemory() c_bool IsKeyError() c_bool IsNotImplemented() diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index c7b4f1e9973..cba9464354a 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -45,7 +45,7 @@ def tearDown(self): pass def test_file_not_exist(self): - with self.assertRaises(pa.ArrowException): + with self.assertRaises(pa.ArrowIOError): FeatherReader('test_invalid_file') def _get_null_counts(self, path, columns=None): From 74c43dfcd81707a2fc386a923ad320797c2ae5de Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 3 Apr 2017 20:42:21 -0400 Subject: [PATCH 2/2] Make a nicer Exception hierachy, with more intuitive bases for thirdparty users Change-Id: Ib137246f3ee39bd622a27ff261f438366a6e257b --- cpp/src/arrow/python/builtin_convert.cc | 8 +- cpp/src/arrow/python/pandas_convert.cc | 6 +- cpp/src/arrow/status.h | 2 +- python/pyarrow/__init__.py | 4 +- python/pyarrow/error.pyx | 24 ++++-- python/pyarrow/includes/common.pxd | 3 +- python/pyarrow/tests/test_convert_builtin.py | 78 ++++++++++---------- python/pyarrow/tests/test_convert_pandas.py | 4 +- 8 files changed, 72 insertions(+), 57 deletions(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 6a13fdccdea..25b32ee26a0 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter { } else if (PyBytes_Check(item)) { bytes_obj = item; } else { - return Status::TypeError( + return Status::Invalid( "Value that cannot be converted to bytes was encountered"); } // No error checking @@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter { } else if (PyBytes_Check(item)) { bytes_obj = item; } else { - return Status::TypeError( + return Status::Invalid( "Value that cannot be converted to bytes was encountered"); } // No error checking @@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter { RETURN_NOT_OK(typed_builder_->AppendNull()); continue; } else if (!PyUnicode_Check(item)) { - return Status::TypeError("Non-unicode value encountered"); + return Status::Invalid("Non-unicode value encountered"); } tmp.reset(PyUnicode_AsUTF8String(item)); RETURN_IF_PYERROR(); @@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length) std::stringstream ss; ss << "Found byte string of length " << length << ", expected length is " << expected_length; - return Status::TypeError(ss.str()); + return Status::Invalid(ss.str()); } return Status::OK(); } diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc index 9577892a55b..48d3489bf90 100644 --- a/cpp/src/arrow/python/pandas_convert.cc +++ b/cpp/src/arrow/python/pandas_convert.cc @@ -161,7 +161,7 @@ static Status AppendObjectStrings( obj = PyUnicode_AsUTF8String(obj); if (obj == NULL) { PyErr_Clear(); - return Status::TypeError("failed converting unicode to UTF8"); + return Status::Invalid("failed converting unicode to UTF8"); } const int32_t length = static_cast(PyBytes_GET_SIZE(obj)); Status s = builder->Append(PyBytes_AS_STRING(obj), length); @@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas obj = PyUnicode_AsUTF8String(obj); if (obj == NULL) { PyErr_Clear(); - return Status::TypeError("failed converting unicode to UTF8"); + return Status::Invalid("failed converting unicode to UTF8"); } RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width)); @@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) { std::stringstream ss; ss << "Python object of type " << cpp_type_name << " is not None and is not a " << expected_type_name << " object"; - return Status::TypeError(ss.str()); + return Status::Invalid(ss.str()); } Status PandasConverter::ConvertDates() { diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h index 05f5b749b60..dd65b753fef 100644 --- a/cpp/src/arrow/status.h +++ b/cpp/src/arrow/status.h @@ -134,7 +134,7 @@ class ARROW_EXPORT Status { bool IsKeyError() const { return code() == StatusCode::KeyError; } bool IsInvalid() const { return code() == StatusCode::Invalid; } bool IsIOError() const { return code() == StatusCode::IOError; } - + bool IsTypeError() const { return code() == StatusCode::TypeError; } bool IsUnknownError() const { return code() == StatusCode::UnknownError; } bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; } diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index d6c7231abe0..8c520748cf3 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -40,9 +40,11 @@ from pyarrow.error import (ArrowException, ArrowKeyError, + ArrowInvalid, ArrowIOError, ArrowMemoryError, - ArrowNotImplementedError) + ArrowNotImplementedError, + ArrowTypeError) from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface, diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx index 0cf4df6282f..259aeb074e3 100644 --- a/python/pyarrow/error.pyx +++ b/python/pyarrow/error.pyx @@ -20,23 +20,31 @@ from pyarrow.includes.common cimport c_string from pyarrow.compat import frombytes -class ArrowException(ValueError): +class ArrowException(Exception): pass -class ArrowMemoryError(MemoryError): +class ArrowInvalid(ValueError, ArrowException): pass -class ArrowIOError(IOError): +class ArrowMemoryError(MemoryError, ArrowException): pass -class ArrowKeyError(KeyError): +class ArrowIOError(IOError, ArrowException): pass -class ArrowNotImplementedError(NotImplementedError): +class ArrowKeyError(KeyError, ArrowException): + pass + + +class ArrowTypeError(TypeError, ArrowException): + pass + + +class ArrowNotImplementedError(NotImplementedError, ArrowException): pass @@ -46,7 +54,9 @@ cdef int check_status(const CStatus& status) nogil except -1: with gil: message = frombytes(status.ToString()) - if status.IsIOError(): + if status.IsInvalid(): + raise ArrowInvalid(message) + elif status.IsIOError(): raise ArrowIOError(message) elif status.IsOutOfMemory(): raise ArrowMemoryError(message) @@ -54,5 +64,7 @@ cdef int check_status(const CStatus& status) nogil except -1: raise ArrowKeyError(message) elif status.IsNotImplemented(): raise ArrowNotImplementedError(message) + elif status.IsTypeError(): + raise ArrowTypeError(message) else: raise ArrowException(message) diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index 8edb5fdb431..ab38ff3084f 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -45,9 +45,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_bool ok() c_bool IsIOError() c_bool IsOutOfMemory() + c_bool IsInvalid() c_bool IsKeyError() c_bool IsNotImplemented() - c_bool IsInvalid() + c_bool IsTypeError() cdef inline object PyObject_to_object(PyObject* o): diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 15fca560c65..e2b03d85ecd 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -17,7 +17,7 @@ # under the License. from pyarrow.compat import unittest, u # noqa -import pyarrow +import pyarrow as pa import datetime @@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase): def test_boolean(self): expected = [True, None, False, None] - arr = pyarrow.from_pylist(expected) + arr = pa.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 - assert arr.type == pyarrow.bool_() + assert arr.type == pa.bool_() assert arr.to_pylist() == expected def test_empty_list(self): - arr = pyarrow.from_pylist([]) + arr = pa.from_pylist([]) assert len(arr) == 0 assert arr.null_count == 0 - assert arr.type == pyarrow.null() + assert arr.type == pa.null() assert arr.to_pylist() == [] def test_all_none(self): - arr = pyarrow.from_pylist([None, None]) + arr = pa.from_pylist([None, None]) assert len(arr) == 2 assert arr.null_count == 2 - assert arr.type == pyarrow.null() + assert arr.type == pa.null() assert arr.to_pylist() == [None, None] def test_integer(self): expected = [1, None, 3, None] - arr = pyarrow.from_pylist(expected) + arr = pa.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 - assert arr.type == pyarrow.int64() + assert arr.type == pa.int64() assert arr.to_pylist() == expected def test_garbage_collection(self): @@ -60,25 +60,25 @@ def test_garbage_collection(self): # Force the cyclic garbage collector to run gc.collect() - bytes_before = pyarrow.total_allocated_bytes() - pyarrow.from_pylist([1, None, 3, None]) + bytes_before = pa.total_allocated_bytes() + pa.from_pylist([1, None, 3, None]) gc.collect() - assert pyarrow.total_allocated_bytes() == bytes_before + assert pa.total_allocated_bytes() == bytes_before def test_double(self): data = [1.5, 1, None, 2.5, None, None] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 6 assert arr.null_count == 3 - assert arr.type == pyarrow.float64() + assert arr.type == pa.float64() assert arr.to_pylist() == data def test_unicode(self): data = [u'foo', u'bar', None, u'maƱana'] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.string() + assert arr.type == pa.string() assert arr.to_pylist() == data def test_bytes(self): @@ -86,31 +86,31 @@ def test_bytes(self): data = [b'foo', u1.decode('utf-8'), # unicode gets encoded, None] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 3 assert arr.null_count == 1 - assert arr.type == pyarrow.binary() + assert arr.type == pa.binary() assert arr.to_pylist() == [b'foo', u1, None] def test_fixed_size_bytes(self): data = [b'foof', None, b'barb', b'2346'] - arr = pyarrow.from_pylist(data, type=pyarrow.binary(4)) + arr = pa.from_pylist(data, type=pa.binary(4)) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.binary(4) + assert arr.type == pa.binary(4) assert arr.to_pylist() == data def test_fixed_size_bytes_does_not_accept_varying_lengths(self): data = [b'foo', None, b'barb', b'2346'] - with self.assertRaises(pyarrow.error.ArrowException): - pyarrow.from_pylist(data, type=pyarrow.binary(4)) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist(data, type=pa.binary(4)) def test_date(self): data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 - assert arr.type == pyarrow.date64() + assert arr.type == pa.date64() assert arr.null_count == 1 assert arr[0].as_py() == datetime.date(2000, 1, 1) assert arr[1].as_py() is None @@ -124,9 +124,9 @@ def test_timestamp(self): datetime.datetime(2006, 1, 13, 12, 34, 56, 432539), datetime.datetime(2010, 8, 13, 5, 46, 57, 437699) ] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 - assert arr.type == pyarrow.timestamp('us') + assert arr.type == pa.timestamp('us') assert arr.null_count == 1 assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1, 23, 34, 123456) @@ -137,28 +137,28 @@ def test_timestamp(self): 46, 57, 437699) def test_mixed_nesting_levels(self): - pyarrow.from_pylist([1, 2, None]) - pyarrow.from_pylist([[1], [2], None]) - pyarrow.from_pylist([[1], [2], [None]]) + pa.from_pylist([1, 2, None]) + pa.from_pylist([[1], [2], None]) + pa.from_pylist([[1], [2], [None]]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([1, 2, [1]]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([1, 2, [1]]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([1, 2, []]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([1, 2, []]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([[1], [2], [None, [1]]]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([[1], [2], [None, [1]]]) def test_list_of_int(self): data = [[1, 2, 3], [], None, [1, 2]] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.list_(pyarrow.int64()) + assert arr.type == pa.list_(pa.int64()) assert arr.to_pylist() == data def test_mixed_types_fails(self): data = ['a', 1, 2.0] - with self.assertRaises(pyarrow.error.ArrowException): - pyarrow.from_pylist(data) + with self.assertRaises(pa.ArrowException): + pa.from_pylist(data) diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 56830a88f2e..87c9c03d7da 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -266,7 +266,7 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self): values = [b'foo', None, b'ba', None, None, b'hey'] df = pd.DataFrame({'strings': values}) schema = A.Schema.from_fields([A.field('strings', A.binary(3))]) - with self.assertRaises(A.error.ArrowException): + with self.assertRaises(A.ArrowInvalid): A.Table.from_pandas(df, schema=schema) def test_timestamps_notimezone_no_nulls(self): @@ -409,7 +409,7 @@ def test_category(self): def test_mixed_types_fails(self): data = pd.DataFrame({'a': ['a', 1, 2.0]}) - with self.assertRaises(A.error.ArrowException): + with self.assertRaises(A.ArrowException): A.Table.from_pandas(data) def test_strided_data_import(self):