From 11c7ed3b9630d5367596f0b7e07bf4c2b53fba88 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Fri, 13 Oct 2017 16:06:29 -0700 Subject: [PATCH 1/4] add support for numpy 'bool' type --- cpp/src/arrow/ipc/metadata-internal.cc | 4 ++++ cpp/src/arrow/python/numpy_convert.cc | 3 ++- python/pyarrow/tests/test_serialization.py | 2 +- python/pyarrow/tests/test_tensor.py | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index ad00cfb6c09..b3961ddaa73 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -462,6 +462,10 @@ static Status TypeToFlatbuffer(FBB& fbb, const DataType& type, static Status TensorTypeToFlatbuffer(FBB& fbb, const DataType& type, flatbuf::Type* out_type, Offset* offset) { switch (type.id()) { + case Type::BOOL: + *out_type = flatbuf::Type_Bool; + *offset = flatbuf::CreateBool(fbb).Union(); + break; case Type::UINT8: INT_TO_FB_CASE(8, false); case Type::INT8: diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index 9ed2d73d42b..5bd17079791 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -88,7 +88,7 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr* out) { int type_num = cast_npy_type_compat(descr->type_num); switch (type_num) { - TO_ARROW_TYPE_CASE(BOOL, uint8); + TO_ARROW_TYPE_CASE(BOOL, boolean); TO_ARROW_TYPE_CASE(INT8, int8); TO_ARROW_TYPE_CASE(INT16, int16); TO_ARROW_TYPE_CASE(INT32, int32); @@ -122,6 +122,7 @@ Status GetNumPyType(const DataType& type, int* type_num) { break; switch (type.id()) { + NUMPY_TYPE_CASE(BOOL, BOOL); NUMPY_TYPE_CASE(UINT8, UINT8); NUMPY_TYPE_CASE(INT8, INT8); NUMPY_TYPE_CASE(UINT16, UINT16); diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index 7e8060b71d4..460a11ba2f8 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -257,7 +257,7 @@ def test_default_dict_serialization(large_memory_map): def test_numpy_serialization(large_memory_map): with pa.memory_map(large_memory_map, mode="r+") as mmap: - for t in ["int8", "uint8", "int16", "uint16", "int32", "uint32", + for t in ["bool", "int8", "uint8", "int16", "uint16", "int32", "uint32", "float16", "float32", "float64"]: obj = np.random.randint(0, 10, size=(100, 100)).astype(t) serialization_roundtrip(obj, mmap) diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index 1d45dc743b5..2864c04481c 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -51,6 +51,7 @@ def test_tensor_base_object(): @pytest.mark.parametrize('dtype_str,arrow_type', [ + ('bool', pa.bool_()), ('i1', pa.int8()), ('i2', pa.int16()), ('i4', pa.int32()), From ad4c6b97399fb34e6752457e9d271d6f2cc02ec3 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Fri, 13 Oct 2017 16:24:40 -0700 Subject: [PATCH 2/4] change bool width to 1 byte --- cpp/src/arrow/type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 443828423e7..bf92b1762f0 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -328,7 +328,7 @@ class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; - int bit_width() const override { return 1; } + int bit_width() const override { return 8; } std::string name() const override { return "bool"; } }; From 8fce724c6a2642aa64d63bf21d6620c9f9c3d872 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Fri, 13 Oct 2017 16:32:15 -0700 Subject: [PATCH 3/4] update --- cpp/src/arrow/type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index bf92b1762f0..e6fd85d97d5 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -328,7 +328,7 @@ class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; - int bit_width() const override { return 8; } + int bit_width() const override { return CHAR_BIT; } std::string name() const override { return "bool"; } }; From 14943a09720b0c92462309cfbba13d7ff1604ce8 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 17 Oct 2017 15:47:47 -0700 Subject: [PATCH 4/4] deploy workaround --- cpp/src/arrow/ipc/metadata-internal.cc | 4 ---- cpp/src/arrow/python/numpy_convert.cc | 3 +-- cpp/src/arrow/python/python_to_arrow.cc | 1 - cpp/src/arrow/type.h | 2 +- python/pyarrow/tests/test_tensor.py | 1 - 5 files changed, 2 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index b3961ddaa73..ad00cfb6c09 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -462,10 +462,6 @@ static Status TypeToFlatbuffer(FBB& fbb, const DataType& type, static Status TensorTypeToFlatbuffer(FBB& fbb, const DataType& type, flatbuf::Type* out_type, Offset* offset) { switch (type.id()) { - case Type::BOOL: - *out_type = flatbuf::Type_Bool; - *offset = flatbuf::CreateBool(fbb).Union(); - break; case Type::UINT8: INT_TO_FB_CASE(8, false); case Type::INT8: diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index 5bd17079791..9ed2d73d42b 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -88,7 +88,7 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr* out) { int type_num = cast_npy_type_compat(descr->type_num); switch (type_num) { - TO_ARROW_TYPE_CASE(BOOL, boolean); + TO_ARROW_TYPE_CASE(BOOL, uint8); TO_ARROW_TYPE_CASE(INT8, int8); TO_ARROW_TYPE_CASE(INT16, int16); TO_ARROW_TYPE_CASE(INT32, int32); @@ -122,7 +122,6 @@ Status GetNumPyType(const DataType& type, int* type_num) { break; switch (type.id()) { - NUMPY_TYPE_CASE(BOOL, BOOL); NUMPY_TYPE_CASE(UINT8, UINT8); NUMPY_TYPE_CASE(INT8, INT8); NUMPY_TYPE_CASE(UINT16, UINT16); diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index ab444f280f6..47d48d7a11c 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -528,7 +528,6 @@ Status SerializeArray(PyObject* context, PyArrayObject* array, SequenceBuilder* std::vector* tensors_out) { int dtype = PyArray_TYPE(array); switch (dtype) { - case NPY_BOOL: case NPY_UINT8: case NPY_INT8: case NPY_UINT16: diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index e6fd85d97d5..443828423e7 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -328,7 +328,7 @@ class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; - int bit_width() const override { return CHAR_BIT; } + int bit_width() const override { return 1; } std::string name() const override { return "bool"; } }; diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index 2864c04481c..1d45dc743b5 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -51,7 +51,6 @@ def test_tensor_base_object(): @pytest.mark.parametrize('dtype_str,arrow_type', [ - ('bool', pa.bool_()), ('i1', pa.int8()), ('i2', pa.int16()), ('i4', pa.int32()),