From f8f430f31ea72ab400bd6195ce31c379f2ecb1f0 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 17 Oct 2023 09:54:18 +0200 Subject: [PATCH 01/73] Initial commit - add _dlpack.pxi --- python/pyarrow/_dlpack.pxi | 123 +++++++++++++++++++++++++++++++++++++ python/pyarrow/array.pxi | 5 ++ 2 files changed, 128 insertions(+) create mode 100644 python/pyarrow/_dlpack.pxi diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi new file mode 100644 index 00000000000..c43888e2b5c --- /dev/null +++ b/python/pyarrow/_dlpack.pxi @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from libc.stdlib cimport malloc + +cimport cpython +from cpython.pycapsule cimport PyCapsule_New + + +ctypedef enum DLDeviceType: + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + kDLOneAPI = 14 + kDLWebGPU = 15 + kDLHexagon = 16 + +ctypedef struct DLDevice: + DLDeviceType device_type + int32_t device_id + +ctypedef enum DLDataTypeCode: + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLOpaqueHandle = 3 + kDLBfloat = 4 + kDLComplex = 5 + kDLBool = 6 + +ctypedef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + +ctypedef struct DLTensor: + void *data + DLDevice device + int32_t ndim + DLDataType dtype + int64_t *shape + int64_t *strides + uint64_t byte_offset + +ctypedef struct DLManagedTensor: + DLTensor dl_tensor + void *manager_ctx + void (*deleter)(DLManagedTensor *) + + +cdef void pycapsule_deleter(object dltensor): + cdef DLManagedTensor* dlm_tensor + if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): + dlm_tensor = cpython.PyCapsule_GetPointer( + dltensor, 'dltensor') + dlm_tensor.deleter(dlm_tensor) + + +cdef void deleter(DLManagedTensor* tensor) with gil: + if tensor.manager_ctx is NULL: + return + free(tensor.dl_tensor.shape) + cpython.Py_DECREF(tensor.manager_ctx) + tensor.manager_ctx = NULL + free(tensor) + + +cpdef object to_dlpack(Array arr) except +: + cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) + + cdef size_t ndim = 0 + cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor + dl_tensor.data = arr.ap + dl_tensor.ndim = ndim + dl_tensor.shape = NULL + dl_tensor.strides = NULL + dl_tensor.byte_offset = 0 + + cdef DLDevice* device = &dl_tensor.device + device.device_type = kDLCPU + device.device_id = 0 + + cdef DLDataType* dtype = &dl_tensor.dtype + if arr.type in [uint8(), uint16(), uint32(), uint64()]: + dtype.code = kDLUInt + elif arr.type in [int8(), int16(), int32(), int64()]: + dtype.code = kDLInt + elif arr.type in [float16(), float32(), float64()]: + dtype.code = kDLFloat + elif arr.type == bool_(): + dtype.code = kDLBool + else: + raise ValueError(f'Unsupported dtype {arr.type}') + dtype.lanes = 1 + dtype.bits = arr.nbytes * 8 + + dlm_tensor.manager_ctx = arr + cpython.Py_INCREF(arr) + dlm_tensor.deleter = deleter + + return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 2e975038227..4fdaf42ed20 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -21,6 +21,8 @@ import os import warnings from cython import sizeof +include "_dlpack.pxi" + cdef _sequence_to_array(object sequence, object mask, object size, DataType type, CMemoryPool* pool, c_bool from_pandas): @@ -1778,6 +1780,9 @@ cdef class Array(_PandasConvertible): return pyarrow_wrap_array(array) + def __dlpack__(self, stream=None): + return to_dlpack(self) + cdef _array_like_to_pandas(obj, options, types_mapper): cdef: From cf143cd23cc7110720bfb222fe70da5b97d78089 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 17 Oct 2023 11:41:03 +0200 Subject: [PATCH 02/73] Initial test --- python/pyarrow/tests/test_array.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 2f9727922b4..e7b66c613a2 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -18,6 +18,7 @@ from collections.abc import Iterable import datetime import decimal +import ctypes import hypothesis as h import hypothesis.strategies as st import itertools @@ -3546,3 +3547,18 @@ def test_run_end_encoded_from_buffers(): with pytest.raises(ValueError): pa.RunEndEncodedArray.from_buffers(ree_type, length, buffers, 1, offset, children) + + +def PyCapsule_IsValid(capsule, name): + return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 + + +def PyCapsule_GetPointer(capsule, name): + return ctypes.pythonapi.PyCapsule_GetPointer(ctypes.py_object(capsule), name) + + +def test_dlpack_spec(): + arr = pa.array([1, 2, 3]) + + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") == True From d43367f042122c172ca08ecd02ebb31b5d9929a0 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 18 Oct 2023 10:34:23 +0200 Subject: [PATCH 03/73] Add to test --- python/pyarrow/_dlpack.pxi | 56 +++++++++++----------- python/pyarrow/tests/test_array.py | 75 ++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 28 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index c43888e2b5c..cff8b4575fd 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -15,40 +15,41 @@ # specific language governing permissions and limitations # under the License. -from libc.stdlib cimport malloc +from libc.stdlib cimport malloc, free cimport cpython from cpython.pycapsule cimport PyCapsule_New +from cython import sizeof ctypedef enum DLDeviceType: - kDLCPU = 1 - kDLCUDA = 2 - kDLCUDAHost = 3 - kDLOpenCL = 4 - kDLVulkan = 7 - kDLMetal = 8 - kDLVPI = 9 - kDLROCM = 10 - kDLROCMHost = 11 - kDLExtDev = 12 - kDLCUDAManaged = 13 - kDLOneAPI = 14 - kDLWebGPU = 15 - kDLHexagon = 16 + kDLCPU + kDLCUDA + kDLCUDAHost + kDLOpenCL + kDLVulkan + kDLMetal + kDLVPI + kDLROCM + kDLROCMHost + kDLExtDev + kDLCUDAManaged + kDLOneAPI + kDLWebGPU + kDLHexagon ctypedef struct DLDevice: DLDeviceType device_type int32_t device_id ctypedef enum DLDataTypeCode: - kDLInt = 0 - kDLUInt = 1 - kDLFloat = 2 - kDLOpaqueHandle = 3 - kDLBfloat = 4 - kDLComplex = 5 - kDLBool = 6 + kDLInt + kDLUInt + kDLFloat + kDLOpaqueHandle + kDLBfloat + kDLComplex + kDLBool ctypedef struct DLDataType: uint8_t code @@ -90,13 +91,12 @@ cdef void deleter(DLManagedTensor* tensor) with gil: cpdef object to_dlpack(Array arr) except +: cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) - cdef size_t ndim = 0 cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor - dl_tensor.data = arr.ap - dl_tensor.ndim = ndim - dl_tensor.shape = NULL - dl_tensor.strides = NULL - dl_tensor.byte_offset = 0 + #dl_tensor.data = arr.ap + #dl_tensor.ndim = 0 + #dl_tensor.shape = NULL + #dl_tensor.strides = NULL + #dl_tensor.byte_offset = 0 cdef DLDevice* device = &dl_tensor.device device.device_type = kDLCPU diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index e7b66c613a2..4e2317fc163 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3549,6 +3549,70 @@ def test_run_end_encoded_from_buffers(): 1, offset, children) + +from enum import IntEnum + +class DLDeviceType(IntEnum): + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + kDLOneAPI = 14 + kDLWebGPU = 15 + kDLHexagon = 16 + +class DLDevice(ctypes.Structure): + _fields_ = [ + ("device_type", ctypes.c_uint), + ("device_id", ctypes.c_int32), + ] + +class DLDataTypeCode(IntEnum): + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLOpaqueHandle = 3 + kDLBfloat = 4 + kDLComplex = 5 + kDLBool = 6 + +class DLDataType(ctypes.Structure): + _fields_ = [ + ("code", ctypes.c_uint8), + ("bits", ctypes.c_uint8), + ("lanes", ctypes.c_uint16), + ] + +class DLTensor(ctypes.Structure): + _fields_ = [ + ("data", ctypes.c_void_p), + ("device", DLDevice), + ("ndim", ctypes.c_int32), + ("dtype", DLDataType), + ("shape", ctypes.POINTER(ctypes.c_int64)), + ("strides", ctypes.POINTER(ctypes.c_int64)), + ("byte_offset", ctypes.c_uint64), + ] + +DLManagedTensorDeleter = ctypes.CFUNCTYPE(None, ctypes.c_void_p) + +class DLManagedTensor(ctypes.Structure): + _fields_ = [ + ("dl_tensor", DLTensor), + ("manager_ctx", ctypes.c_void_p), + ("deleter", DLManagedTensorDeleter), +] + +DLManagedTensor_p = ctypes.POINTER(DLManagedTensor) + + def PyCapsule_IsValid(capsule, name): return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 @@ -3557,8 +3621,19 @@ def PyCapsule_GetPointer(capsule, name): return ctypes.pythonapi.PyCapsule_GetPointer(ctypes.py_object(capsule), name) +def PyCapsule_New(pointer, name, destructor): + return ctypes.pythonapi.PyCapsule_New(ctypes.c_void_p(pointer), + name) + + def test_dlpack_spec(): arr = pa.array([1, 2, 3]) DLTensor = arr.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") == True + + pointer = PyCapsule_GetPointer(DLTensor, b"dltensor") + tensor = ctypes.cast(pointer, DLManagedTensor_p) + breakpoint() + tensor.contents.dl_tensor.ndim + tensor.contents.dl_tensor.ndim From 5778a334d5a45f5e7da1d6b8626632138ea98a1c Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 26 Oct 2023 09:42:13 +0200 Subject: [PATCH 04/73] New update --- python/pyarrow/_dlpack.pxi | 65 ++++++++++++++++-------------- python/pyarrow/tests/test_array.py | 4 +- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index cff8b4575fd..7e37adb3ed5 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -23,33 +23,33 @@ from cython import sizeof ctypedef enum DLDeviceType: - kDLCPU - kDLCUDA - kDLCUDAHost - kDLOpenCL - kDLVulkan - kDLMetal - kDLVPI - kDLROCM - kDLROCMHost - kDLExtDev - kDLCUDAManaged - kDLOneAPI - kDLWebGPU - kDLHexagon + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + kDLOneAPI = 14 + kDLWebGPU = 15 + kDLHexagon = 16 ctypedef struct DLDevice: DLDeviceType device_type int32_t device_id ctypedef enum DLDataTypeCode: - kDLInt - kDLUInt - kDLFloat - kDLOpaqueHandle - kDLBfloat - kDLComplex - kDLBool + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLOpaqueHandle = 3 + kDLBfloat = 4 + kDLComplex = 5 + kDLBool = 6 ctypedef struct DLDataType: uint8_t code @@ -92,11 +92,14 @@ cpdef object to_dlpack(Array arr) except +: cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor - #dl_tensor.data = arr.ap - #dl_tensor.ndim = 0 - #dl_tensor.shape = NULL - #dl_tensor.strides = NULL - #dl_tensor.byte_offset = 0 + dl_tensor.data = arr.buffers()[1].address + dl_tensor.ndim = 1 + cdef int64_t* shape = malloc(sizeof(int64_t)) + shape[0] = len(arr) + dl_tensor.shape = shape + + dl_tensor.strides = NULL + dl_tensor.byte_offset = 0 cdef DLDevice* device = &dl_tensor.device device.device_type = kDLCPU @@ -104,17 +107,17 @@ cpdef object to_dlpack(Array arr) except +: cdef DLDataType* dtype = &dl_tensor.dtype if arr.type in [uint8(), uint16(), uint32(), uint64()]: - dtype.code = kDLUInt + dtype.code = kDLUInt elif arr.type in [int8(), int16(), int32(), int64()]: - dtype.code = kDLInt + dtype.code = kDLInt elif arr.type in [float16(), float32(), float64()]: - dtype.code = kDLFloat + dtype.code = kDLFloat elif arr.type == bool_(): - dtype.code = kDLBool + dtype.code = kDLBool else: raise ValueError(f'Unsupported dtype {arr.type}') dtype.lanes = 1 - dtype.bits = arr.nbytes * 8 + dtype.bits = arr.type.bit_width dlm_tensor.manager_ctx = arr cpython.Py_INCREF(arr) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 4e2317fc163..cdbc7e4b2f4 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3605,7 +3605,7 @@ class DLTensor(ctypes.Structure): class DLManagedTensor(ctypes.Structure): _fields_ = [ - ("dl_tensor", DLTensor), + ("dl_tensor", (DLTensor)), ("manager_ctx", ctypes.c_void_p), ("deleter", DLManagedTensorDeleter), ] @@ -3634,6 +3634,4 @@ def test_dlpack_spec(): pointer = PyCapsule_GetPointer(DLTensor, b"dltensor") tensor = ctypes.cast(pointer, DLManagedTensor_p) - breakpoint() - tensor.contents.dl_tensor.ndim tensor.contents.dl_tensor.ndim From 1f0e1005e5f392dad0be6849f1150da459c0efee Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 26 Oct 2023 11:36:44 +0200 Subject: [PATCH 05/73] Correct data pointer cast and add numpy test --- python/pyarrow/_dlpack.pxi | 3 +- python/pyarrow/tests/test_array.py | 92 +++++------------------------- 2 files changed, 15 insertions(+), 80 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 7e37adb3ed5..ca7c130efbf 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -92,7 +92,8 @@ cpdef object to_dlpack(Array arr) except +: cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor - dl_tensor.data = arr.buffers()[1].address + cdef intptr_t data_ptr = arr.buffers()[1].address + dl_tensor.data = data_ptr dl_tensor.ndim = 1 cdef int64_t* shape = malloc(sizeof(int64_t)) shape[0] = len(arr) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index cdbc7e4b2f4..8e1a59497f8 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -32,6 +32,7 @@ import pyarrow as pa import pyarrow.tests.strategies as past +from pyarrow.vendored.version import Version def test_total_bytes_allocated(): @@ -3549,89 +3550,22 @@ def test_run_end_encoded_from_buffers(): 1, offset, children) - -from enum import IntEnum - -class DLDeviceType(IntEnum): - kDLCPU = 1 - kDLCUDA = 2 - kDLCUDAHost = 3 - kDLOpenCL = 4 - kDLVulkan = 7 - kDLMetal = 8 - kDLVPI = 9 - kDLROCM = 10 - kDLROCMHost = 11 - kDLExtDev = 12 - kDLCUDAManaged = 13 - kDLOneAPI = 14 - kDLWebGPU = 15 - kDLHexagon = 16 - -class DLDevice(ctypes.Structure): - _fields_ = [ - ("device_type", ctypes.c_uint), - ("device_id", ctypes.c_int32), - ] - -class DLDataTypeCode(IntEnum): - kDLInt = 0 - kDLUInt = 1 - kDLFloat = 2 - kDLOpaqueHandle = 3 - kDLBfloat = 4 - kDLComplex = 5 - kDLBool = 6 - -class DLDataType(ctypes.Structure): - _fields_ = [ - ("code", ctypes.c_uint8), - ("bits", ctypes.c_uint8), - ("lanes", ctypes.c_uint16), - ] - -class DLTensor(ctypes.Structure): - _fields_ = [ - ("data", ctypes.c_void_p), - ("device", DLDevice), - ("ndim", ctypes.c_int32), - ("dtype", DLDataType), - ("shape", ctypes.POINTER(ctypes.c_int64)), - ("strides", ctypes.POINTER(ctypes.c_int64)), - ("byte_offset", ctypes.c_uint64), - ] - -DLManagedTensorDeleter = ctypes.CFUNCTYPE(None, ctypes.c_void_p) - -class DLManagedTensor(ctypes.Structure): - _fields_ = [ - ("dl_tensor", (DLTensor)), - ("manager_ctx", ctypes.c_void_p), - ("deleter", DLManagedTensorDeleter), -] - -DLManagedTensor_p = ctypes.POINTER(DLManagedTensor) - - def PyCapsule_IsValid(capsule, name): return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 -def PyCapsule_GetPointer(capsule, name): - return ctypes.pythonapi.PyCapsule_GetPointer(ctypes.py_object(capsule), name) - - -def PyCapsule_New(pointer, name, destructor): - return ctypes.pythonapi.PyCapsule_New(ctypes.c_void_p(pointer), - name) - - -def test_dlpack_spec(): - arr = pa.array([1, 2, 3]) +@pytest.mark.parametrize( + 'tensor_type', + [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] +) +def test_dlpack_spec(tensor_type): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + arr = pa.array([1, 2, 3], type=tensor_type) DLTensor = arr.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") == True + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - pointer = PyCapsule_GetPointer(DLTensor, b"dltensor") - tensor = ctypes.cast(pointer, DLManagedTensor_p) - tensor.contents.dl_tensor.ndim + expected = np.array([1, 2, 3]) + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected) From e23249ee67b2042ad9c728df8967f08862b7bb52 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 26 Oct 2023 12:08:23 +0200 Subject: [PATCH 06/73] Add buffer check and tests --- python/pyarrow/array.pxi | 2 ++ python/pyarrow/tests/test_array.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 4fdaf42ed20..85b2ab43659 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1781,6 +1781,8 @@ cdef class Array(_PandasConvertible): return pyarrow_wrap_array(array) def __dlpack__(self, stream=None): + if len(self.buffers()) > 2 or self.buffers()[0]: + raise ArrowTypeError("Can only use __dlpack__ on primitive types with no validity buffer.") return to_dlpack(self) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8e1a59497f8..64f97e97bc1 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3558,7 +3558,7 @@ def PyCapsule_IsValid(capsule, name): 'tensor_type', [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] ) -def test_dlpack_spec(tensor_type): +def test_dlpack(tensor_type): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") arr = pa.array([1, 2, 3], type=tensor_type) @@ -3569,3 +3569,18 @@ def test_dlpack_spec(tensor_type): expected = np.array([1, 2, 3]) result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected) + + +def test_dlpack_not_supported(): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + with pytest.raises(pa.ArrowTypeError): + arr = pa.array([1, None, 3]) + np.from_dlpack(arr) + + with pytest.raises(pa.ArrowTypeError): + arr = pa.array( + [[0, 1], [3, 4]], + type=pa.list_(pa.int32()) + ) + np.from_dlpack(arr) From 3d8c581bc5c13f3901dd7689524feee265315bf6 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 26 Oct 2023 15:43:17 +0200 Subject: [PATCH 07/73] Fix linter errors and try to update code for Cython 3 --- python/pyarrow/_dlpack.pxi | 7 +++---- python/pyarrow/array.pxi | 3 ++- python/pyarrow/tests/test_array.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index ca7c130efbf..e2bfeea3adb 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -71,7 +71,7 @@ ctypedef struct DLManagedTensor: void (*deleter)(DLManagedTensor *) -cdef void pycapsule_deleter(object dltensor): +cdef void pycapsule_deleter(object dltensor) noexcept: cdef DLManagedTensor* dlm_tensor if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): dlm_tensor = cpython.PyCapsule_GetPointer( @@ -88,7 +88,7 @@ cdef void deleter(DLManagedTensor* tensor) with gil: free(tensor) -cpdef object to_dlpack(Array arr) except +: +cpdef object to_dlpack(Array arr) except *: cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor @@ -115,8 +115,7 @@ cpdef object to_dlpack(Array arr) except +: dtype.code = kDLFloat elif arr.type == bool_(): dtype.code = kDLBool - else: - raise ValueError(f'Unsupported dtype {arr.type}') + dtype.lanes = 1 dtype.bits = arr.type.bit_width diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 85b2ab43659..5989c69999c 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1782,7 +1782,8 @@ cdef class Array(_PandasConvertible): def __dlpack__(self, stream=None): if len(self.buffers()) > 2 or self.buffers()[0]: - raise ArrowTypeError("Can only use __dlpack__ on primitive types with no validity buffer.") + raise ArrowTypeError( + "Can only use __dlpack__ on primitive types with no validity buffer.") return to_dlpack(self) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 64f97e97bc1..b178bbac87c 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3577,7 +3577,7 @@ def test_dlpack_not_supported(): with pytest.raises(pa.ArrowTypeError): arr = pa.array([1, None, 3]) np.from_dlpack(arr) - + with pytest.raises(pa.ArrowTypeError): arr = pa.array( [[0, 1], [3, 4]], From 82a270f6d536bfc0c114c9ded56608cc9ffa6653 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 8 Nov 2023 15:31:35 +0100 Subject: [PATCH 08/73] Add implementation example for fixed shape tensor before moving the code to C++ --- python/pyarrow/_dlpack.pxi | 43 ++++++++++++++++++++++++++++++ python/pyarrow/array.pxi | 6 +++++ python/pyarrow/tests/test_array.py | 26 +++++++++++++++--- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index e2bfeea3adb..2def92a4c61 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -124,3 +124,46 @@ cpdef object to_dlpack(Array arr) except *: dlm_tensor.deleter = deleter return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) + + +cpdef object fixed_shape_tensor_to_dlpack(Array arr) except *: + cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) + + cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor + cdef intptr_t data_ptr = arr.buffers()[2].address + dl_tensor.data = data_ptr + + cdef size_t ndim = len(arr) + dl_tensor.ndim = ndim + + cdef int64_t* shape = malloc(ndim * sizeof(int64_t) * 2) + shape[0] = ndim + for n in range(ndim-1): + shape[n+1] = arr.type.shape[n] + dl_tensor.shape = shape + + dl_tensor.strides = NULL + dl_tensor.byte_offset = 0 + + cdef DLDevice* device = &dl_tensor.device + device.device_type = kDLCPU + device.device_id = 0 + + cdef DLDataType* dtype = &dl_tensor.dtype + if arr.type.value_type in [uint8(), uint16(), uint32(), uint64()]: + dtype.code = kDLUInt + elif arr.type.value_type in [int8(), int16(), int32(), int64()]: + dtype.code = kDLInt + elif arr.type.value_type in [float16(), float32(), float64()]: + dtype.code = kDLFloat + elif arr.type.value_type == bool_(): + dtype.code = kDLBool + + dtype.lanes = 1 + dtype.bits = arr.type.value_type.bit_width + + dlm_tensor.manager_ctx = arr + cpython.Py_INCREF(arr) + dlm_tensor.deleter = deleter + + return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) \ No newline at end of file diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 5989c69999c..8f8df9a5356 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3593,6 +3593,12 @@ class FixedShapeTensorArray(ExtensionArray): FixedSizeListArray.from_arrays(np.ravel(obj, order='C'), size) ) + def __dlpack__(self, stream=None): + if len(self.buffers()) > 3 or self.buffers()[0] or self.buffers()[1]: + raise ArrowTypeError( + "Can only use __dlpack__ on fixed shape tensor array with no validity buffers.") + return fixed_shape_tensor_to_dlpack(self) + cdef dict _array_classes = { _Type_NA: NullArray, diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index b178bbac87c..8c3d1b5fa40 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3555,13 +3555,13 @@ def PyCapsule_IsValid(capsule, name): @pytest.mark.parametrize( - 'tensor_type', + 'value_type', [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] ) -def test_dlpack(tensor_type): +def test_dlpack(value_type): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - arr = pa.array([1, 2, 3], type=tensor_type) + arr = pa.array([1, 2, 3], type=value_type) DLTensor = arr.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True @@ -3571,6 +3571,26 @@ def test_dlpack(tensor_type): np.testing.assert_array_equal(result, expected) +@pytest.mark.parametrize( + 'value_type', + [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] +) +def test_dlpack_on_fixed_shape_tensor(value_type): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + tensor_type = pa.fixed_shape_tensor(value_type, [2, 2]) + arr = [[1, 2, 3, 4], [10, 20, 30, 40], [1, 2, 3, 4]] + storage = pa.array(arr, pa.list_(value_type, 4)) + arr = pa.ExtensionArray.from_storage(tensor_type, storage) + + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + + expected = arr.to_numpy_ndarray() + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected) + + def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") From 00ac266bb5465a0f6e63f2303c76f902b2fdc974 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 8 Nov 2023 15:33:14 +0100 Subject: [PATCH 09/73] Fix linter error --- python/pyarrow/_dlpack.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 2def92a4c61..9a5aad883ed 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -166,4 +166,4 @@ cpdef object fixed_shape_tensor_to_dlpack(Array arr) except *: cpython.Py_INCREF(arr) dlm_tensor.deleter = deleter - return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) \ No newline at end of file + return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) From aeb20a6a7f9d6911a9bbd04c4e7955d2d108ad32 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 15 Nov 2023 08:12:40 +0100 Subject: [PATCH 10/73] Initial move to the Arrow C++ --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/dlpack.cc | 95 ++++++++ cpp/src/arrow/dlpack.h | 32 +++ cpp/src/arrow/dlpack_structure.h | 318 +++++++++++++++++++++++++++ python/pyarrow/_dlpack.pxi | 136 +----------- python/pyarrow/array.pxi | 3 +- python/pyarrow/includes/libarrow.pxd | 54 +++++ python/pyarrow/tests/test_array.py | 66 +++--- 8 files changed, 536 insertions(+), 169 deletions(-) create mode 100644 cpp/src/arrow/dlpack.cc create mode 100644 cpp/src/arrow/dlpack.h create mode 100644 cpp/src/arrow/dlpack_structure.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 46a7aa91063..f2e38634ad4 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -174,6 +174,7 @@ set(ARROW_SRCS config.cc datum.cc device.cc + dlpack.cc extension_type.cc memory_pool.cc pretty_print.cc diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc new file mode 100644 index 00000000000..71c9527913e --- /dev/null +++ b/cpp/src/arrow/dlpack.cc @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/array/array_base.h" +#include "arrow/dlpack_structure.h" +#include "arrow/type.h" + +namespace arrow { + +DLDataType getDLDataType(const Array& arr) { + DLDataType dtype; + dtype.lanes = 1; + dtype.bits = arr.type()->bit_width(); + switch (arr.type()->id()) { + case Type::INT8: + case Type::INT16: + case Type::INT32: + case Type::INT64: + dtype.code = DLDataTypeCode::kDLInt; + break; + case Type::UINT8: + case Type::UINT16: + case Type::UINT32: + case Type::UINT64: + dtype.code = DLDataTypeCode::kDLUInt; + break; + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + dtype.code = DLDataTypeCode::kDLFloat; + break; + case Type::BOOL: + dtype.code = DLDataTypeCode::kDLBool; + break; + default: + // TODO + break; + } + return dtype; +} + +struct DLMTensorCtx { + std::shared_ptr ref; + std::vector shape; + DLManagedTensor tensor; +}; + +static void deleter(DLManagedTensor* arg) { + delete static_cast(arg->manager_ctx); +} + +DLManagedTensor* toDLPack(const Array& arr) { + std::shared_ptr array_ref = arr.data(); + DLMTensorCtx* DLMTensor = new DLMTensorCtx; + DLMTensor->ref = array_ref; + + DLManagedTensor* dlm_tensor = &DLMTensor->tensor; + dlm_tensor->manager_ctx = DLMTensor; + dlm_tensor->deleter = &deleter; + dlm_tensor->dl_tensor.data = + const_cast(reinterpret_cast(array_ref->buffers[1]->address())); + + DLDevice ctx; + ctx.device_id = 0; + ctx.device_type = DLDeviceType::kDLCPU; + dlm_tensor->dl_tensor.device = ctx; + + dlm_tensor->dl_tensor.ndim = 1; + dlm_tensor->dl_tensor.dtype = getDLDataType(arr); + + std::vector* shape_arr = &DLMTensor->shape; + shape_arr->resize(1); + (*shape_arr)[0] = arr.length(); + dlm_tensor->dl_tensor.shape = shape_arr->data(); + dlm_tensor->dl_tensor.strides = NULL; + dlm_tensor->dl_tensor.byte_offset = 0; + + return dlm_tensor; +} + +} // namespace arrow diff --git a/cpp/src/arrow/dlpack.h b/cpp/src/arrow/dlpack.h new file mode 100644 index 00000000000..7be5d9b7da9 --- /dev/null +++ b/cpp/src/arrow/dlpack.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/array/array_base.h" +#include "arrow/dlpack_structure.h" + +namespace arrow { + +/// \brief DLPack protocol for producing DLManagedTensor +/// +/// Returns pointer to the DLManagedTensor class defined by +// the DLPack protocol +ARROW_EXPORT +DLManagedTensor* toDLPack(const Array& arr); + +} // namespace arrow diff --git a/cpp/src/arrow/dlpack_structure.h b/cpp/src/arrow/dlpack_structure.h new file mode 100644 index 00000000000..0bdc1fe0ea3 --- /dev/null +++ b/cpp/src/arrow/dlpack_structure.h @@ -0,0 +1,318 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +/** + * \brief Compatibility with C++ + */ +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current major version of dlpack */ +#define DLPACK_MAJOR_VERSION 1 + +/*! \brief The current minor version of dlpack */ +#define DLPACK_MINOR_VERSION 0 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*! + * \brief The DLPack version. + * + * A change in major version indicates that we have changed the + * data layout of the ABI - DLManagedTensorVersioned. + * + * A change in minor version indicates that we have added new + * code, such as a new device type, but the ABI is kept the same. + * + * If an obtained DLPack tensor has a major version that disagrees + * with the version number specified in this header file + * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter + * (and it is safe to do so). It is not safe to access any other fields + * as the memory layout will have changed. + * + * In the case of a minor version mismatch, the tensor can be safely used as + * long as the consumer knows how to interpret all fields. Minor version + * updates indicate the addition of enumeration values. + */ +typedef struct { + /*! \brief DLPack major version. */ + uint32_t major; + /*! \brief DLPack minor version. */ + uint32_t minor; +} DLPackVersion; + +/*! + * \brief The device type in DLDevice. + */ +#ifdef __cplusplus +typedef enum : int32_t { +#else +typedef enum { +#endif + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, + /*! + * \brief Unified shared memory allocated on a oneAPI non-partititioned + * device. Call to oneAPI runtime is required to determine the device + * type, the USM allocation type and the sycl context it is bound to. + * + */ + kDLOneAPI = 14, + /*! \brief GPU support for next generation WebGPU standard. */ + kDLWebGPU = 15, + /*! \brief Qualcomm Hexagon DSP */ + kDLHexagon = 16, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int32_t device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, + /*! \brief boolean */ + kDLBool = 6U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. The data type is assumed to follow the + * native endian-ness. An explicit error message should be raised when attempting to + * export an array with non-native endianness + * + * Examples + * - float: type_code = 2, bits = 32, lanes = 1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4 + * - int8: type_code = 0, bits = 8, lanes = 1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 + * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits) + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The data pointer points to the allocated data. This will be CUDA + * device pointer or cl_mem handle in OpenCL. It may be opaque on some device + * types. This pointer is always aligned to 256 bytes as in CUDA. The + * `byte_offset` field should be used to point to the beginning of the data. + * + * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, + * TVM, perhaps others) do not adhere to this 256 byte aligment requirement + * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed + * (after which this note will be updated); at the moment it is recommended + * to not rely on the data pointer being correctly aligned. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int32_t ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + * + * \note This data structure is used as Legacy DLManagedTensor + * in DLPack exchange and is deprecated after DLPack v0.8 + * Use DLManagedTensorVersioned instead. + * This data structure may get renamed or deleted in future versions. + * + * \sa DLManagedTensorVersioned + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! + * \brief Destructor - this should be called + * to destruct the manager_ctx which backs the DLManagedTensor. It can be + * NULL if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; + +// bit masks used in in the DLManagedTensorVersioned + +/*! \brief bit mask to indicate that the tensor is read only. */ +#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL) + +/*! + * \brief A versioned and managed C Tensor object, manage memory of DLTensor. + * + * This data structure is intended to facilitate the borrowing of DLTensor by + * another framework. It is not meant to transfer the tensor. When the borrowing + * framework doesn't need the tensor, it should call the deleter to notify the + * host that the resource is no longer needed. + * + * \note This is the current standard DLPack exchange data structure. + */ +struct DLManagedTensorVersioned { + /*! + * \brief The API and ABI version of the current managed Tensor + */ + DLPackVersion version; + /*! + * \brief the context of the original host framework. + * + * Stores DLManagedTensorVersioned is used in the + * framework. It can also be NULL. + */ + void *manager_ctx; + /*! + * \brief Destructor. + * + * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned. + * It can be NULL if there is no way for the caller to provide a reasonable + * destructor. The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensorVersioned *self); + /*! + * \brief Additional bitmask flags information about the tensor. + * + * By default the flags should be set to 0. + * + * \note Future ABI changes should keep everything until this field + * stable, to ensure that deleter can be correctly called. + * + * \sa DLPACK_FLAG_BITMASK_READ_ONLY + */ + uint64_t flags; + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; +}; + +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 9a5aad883ed..a7144c5de13 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -22,55 +22,6 @@ from cpython.pycapsule cimport PyCapsule_New from cython import sizeof -ctypedef enum DLDeviceType: - kDLCPU = 1 - kDLCUDA = 2 - kDLCUDAHost = 3 - kDLOpenCL = 4 - kDLVulkan = 7 - kDLMetal = 8 - kDLVPI = 9 - kDLROCM = 10 - kDLROCMHost = 11 - kDLExtDev = 12 - kDLCUDAManaged = 13 - kDLOneAPI = 14 - kDLWebGPU = 15 - kDLHexagon = 16 - -ctypedef struct DLDevice: - DLDeviceType device_type - int32_t device_id - -ctypedef enum DLDataTypeCode: - kDLInt = 0 - kDLUInt = 1 - kDLFloat = 2 - kDLOpaqueHandle = 3 - kDLBfloat = 4 - kDLComplex = 5 - kDLBool = 6 - -ctypedef struct DLDataType: - uint8_t code - uint8_t bits - uint16_t lanes - -ctypedef struct DLTensor: - void *data - DLDevice device - int32_t ndim - DLDataType dtype - int64_t *shape - int64_t *strides - uint64_t byte_offset - -ctypedef struct DLManagedTensor: - DLTensor dl_tensor - void *manager_ctx - void (*deleter)(DLManagedTensor *) - - cdef void pycapsule_deleter(object dltensor) noexcept: cdef DLManagedTensor* dlm_tensor if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): @@ -79,91 +30,6 @@ cdef void pycapsule_deleter(object dltensor) noexcept: dlm_tensor.deleter(dlm_tensor) -cdef void deleter(DLManagedTensor* tensor) with gil: - if tensor.manager_ctx is NULL: - return - free(tensor.dl_tensor.shape) - cpython.Py_DECREF(tensor.manager_ctx) - tensor.manager_ctx = NULL - free(tensor) - - cpdef object to_dlpack(Array arr) except *: - cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) - - cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor - cdef intptr_t data_ptr = arr.buffers()[1].address - dl_tensor.data = data_ptr - dl_tensor.ndim = 1 - cdef int64_t* shape = malloc(sizeof(int64_t)) - shape[0] = len(arr) - dl_tensor.shape = shape - - dl_tensor.strides = NULL - dl_tensor.byte_offset = 0 - - cdef DLDevice* device = &dl_tensor.device - device.device_type = kDLCPU - device.device_id = 0 - - cdef DLDataType* dtype = &dl_tensor.dtype - if arr.type in [uint8(), uint16(), uint32(), uint64()]: - dtype.code = kDLUInt - elif arr.type in [int8(), int16(), int32(), int64()]: - dtype.code = kDLInt - elif arr.type in [float16(), float32(), float64()]: - dtype.code = kDLFloat - elif arr.type == bool_(): - dtype.code = kDLBool - - dtype.lanes = 1 - dtype.bits = arr.type.bit_width - - dlm_tensor.manager_ctx = arr - cpython.Py_INCREF(arr) - dlm_tensor.deleter = deleter - - return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) - - -cpdef object fixed_shape_tensor_to_dlpack(Array arr) except *: - cdef DLManagedTensor* dlm_tensor = malloc(sizeof(DLManagedTensor)) - - cdef DLTensor* dl_tensor = &dlm_tensor.dl_tensor - cdef intptr_t data_ptr = arr.buffers()[2].address - dl_tensor.data = data_ptr - - cdef size_t ndim = len(arr) - dl_tensor.ndim = ndim - - cdef int64_t* shape = malloc(ndim * sizeof(int64_t) * 2) - shape[0] = ndim - for n in range(ndim-1): - shape[n+1] = arr.type.shape[n] - dl_tensor.shape = shape - - dl_tensor.strides = NULL - dl_tensor.byte_offset = 0 - - cdef DLDevice* device = &dl_tensor.device - device.device_type = kDLCPU - device.device_id = 0 - - cdef DLDataType* dtype = &dl_tensor.dtype - if arr.type.value_type in [uint8(), uint16(), uint32(), uint64()]: - dtype.code = kDLUInt - elif arr.type.value_type in [int8(), int16(), int32(), int64()]: - dtype.code = kDLInt - elif arr.type.value_type in [float16(), float32(), float64()]: - dtype.code = kDLFloat - elif arr.type.value_type == bool_(): - dtype.code = kDLBool - - dtype.lanes = 1 - dtype.bits = arr.type.value_type.bit_width - - dlm_tensor.manager_ctx = arr - cpython.Py_INCREF(arr) - dlm_tensor.deleter = deleter - + dlm_tensor = toDLPack(deref(pyarrow_unwrap_array(arr).get())) return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 8f8df9a5356..2367aacf98f 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3597,7 +3597,8 @@ class FixedShapeTensorArray(ExtensionArray): if len(self.buffers()) > 3 or self.buffers()[0] or self.buffers()[1]: raise ArrowTypeError( "Can only use __dlpack__ on fixed shape tensor array with no validity buffers.") - return fixed_shape_tensor_to_dlpack(self) + #return fixed_shape_tensor_to_dlpack(self) + return None cdef dict _array_classes = { diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 59b63b5fb79..8ce2e763d67 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1199,6 +1199,60 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type) +cdef extern from "arrow/dlpack_structure.h" nogil: + cdef enum DLDeviceType: + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + kDLOneAPI = 14 + kDLWebGPU = 15 + kDLHexagon = 16 + + ctypedef struct DLDevice: + DLDeviceType device_type + int32_t device_id + + cdef enum DLDataTypeCode: + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLOpaqueHandle = 3 + kDLBfloat = 4 + kDLComplex = 5 + kDLBool = 6 + + ctypedef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + + ctypedef struct DLTensor: + void* data + DLDevice device + int32_t ndim + DLDataType dtype + int64_t* shape + int64_t* strides + uint64_t byte_offset + + ctypedef struct DLManagedTensor: + DLTensor dl_tensor + void* manager_ctx + void (*deleter)(DLManagedTensor*) + + +cdef extern from "arrow/dlpack.h" namespace "arrow" nogil: + DLManagedTensor* toDLPack(const CArray& arr) + + cdef extern from "arrow/builder.h" namespace "arrow" nogil: cdef cppclass CArrayBuilder" arrow::ArrayBuilder": diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8c3d1b5fa40..f1b12797b83 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3571,36 +3571,36 @@ def test_dlpack(value_type): np.testing.assert_array_equal(result, expected) -@pytest.mark.parametrize( - 'value_type', - [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] -) -def test_dlpack_on_fixed_shape_tensor(value_type): - if Version(np.__version__) < Version("1.22.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - tensor_type = pa.fixed_shape_tensor(value_type, [2, 2]) - arr = [[1, 2, 3, 4], [10, 20, 30, 40], [1, 2, 3, 4]] - storage = pa.array(arr, pa.list_(value_type, 4)) - arr = pa.ExtensionArray.from_storage(tensor_type, storage) - - DLTensor = arr.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - - expected = arr.to_numpy_ndarray() - result = np.from_dlpack(arr) - np.testing.assert_array_equal(result, expected) - - -def test_dlpack_not_supported(): - if Version(np.__version__) < Version("1.22.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - with pytest.raises(pa.ArrowTypeError): - arr = pa.array([1, None, 3]) - np.from_dlpack(arr) - - with pytest.raises(pa.ArrowTypeError): - arr = pa.array( - [[0, 1], [3, 4]], - type=pa.list_(pa.int32()) - ) - np.from_dlpack(arr) +# @pytest.mark.parametrize( +# 'value_type', +# [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] +# ) +# def test_dlpack_on_fixed_shape_tensor(value_type): +# if Version(np.__version__) < Version("1.22.0"): +# pytest.skip("No dlpack support in numpy versions older than 1.22.0.") +# tensor_type = pa.fixed_shape_tensor(value_type, [2, 2]) +# arr = [[1, 2, 3, 4], [10, 20, 30, 40], [1, 2, 3, 4]] +# storage = pa.array(arr, pa.list_(value_type, 4)) +# arr = pa.ExtensionArray.from_storage(tensor_type, storage) + +# DLTensor = arr.__dlpack__() +# assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + +# expected = arr.to_numpy_ndarray() +# result = np.from_dlpack(arr) +# np.testing.assert_array_equal(result, expected) + + +# def test_dlpack_not_supported(): +# if Version(np.__version__) < Version("1.22.0"): +# pytest.skip("No dlpack support in numpy versions older than 1.22.0.") +# with pytest.raises(pa.ArrowTypeError): +# arr = pa.array([1, None, 3]) +# np.from_dlpack(arr) + +# with pytest.raises(pa.ArrowTypeError): +# arr = pa.array( +# [[0, 1], [3, 4]], +# type=pa.list_(pa.int32()) +# ) +# np.from_dlpack(arr) From fadf1f921159a08d10fad0ecc8175b64f904b84b Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 16 Nov 2023 12:20:04 +0100 Subject: [PATCH 11/73] Cython lint fix --- python/pyarrow/array.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 2367aacf98f..abc16769a96 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3597,7 +3597,7 @@ class FixedShapeTensorArray(ExtensionArray): if len(self.buffers()) > 3 or self.buffers()[0] or self.buffers()[1]: raise ArrowTypeError( "Can only use __dlpack__ on fixed shape tensor array with no validity buffers.") - #return fixed_shape_tensor_to_dlpack(self) + # return fixed_shape_tensor_to_dlpack(self) return None From 72ffcf3061d7a68f87df8d84a16998ddba06dd55 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 16 Nov 2023 12:31:20 +0100 Subject: [PATCH 12/73] Run linter on dlpack header file --- cpp/src/arrow/dlpack_structure.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/dlpack_structure.h b/cpp/src/arrow/dlpack_structure.h index 0bdc1fe0ea3..152bc2a9c86 100644 --- a/cpp/src/arrow/dlpack_structure.h +++ b/cpp/src/arrow/dlpack_structure.h @@ -32,8 +32,8 @@ #define DLPACK_DLL #endif -#include #include +#include #ifdef __cplusplus extern "C" { @@ -167,7 +167,8 @@ typedef enum { * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4 * - int8: type_code = 0, bits = 8, lanes = 1 * - std::complex: type_code = 5, bits = 64, lanes = 1 - * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits) + * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, + * the underlying storage size of bool is 8 bits) */ typedef struct { /*! @@ -252,14 +253,14 @@ typedef struct DLManagedTensor { /*! \brief the context of the original host framework of DLManagedTensor in * which DLManagedTensor is used in the framework. It can also be NULL. */ - void * manager_ctx; + void* manager_ctx; /*! * \brief Destructor - this should be called * to destruct the manager_ctx which backs the DLManagedTensor. It can be * NULL if there is no way for the caller to provide a reasonable destructor. * The destructors deletes the argument self as well. */ - void (*deleter)(struct DLManagedTensor * self); + void (*deleter)(struct DLManagedTensor* self); } DLManagedTensor; // bit masks used in in the DLManagedTensorVersioned @@ -288,15 +289,15 @@ struct DLManagedTensorVersioned { * Stores DLManagedTensorVersioned is used in the * framework. It can also be NULL. */ - void *manager_ctx; + void* manager_ctx; /*! * \brief Destructor. * - * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned. - * It can be NULL if there is no way for the caller to provide a reasonable - * destructor. The destructors deletes the argument self as well. + * This should be called to destruct manager_ctx which holds the + * DLManagedTensorVersioned. It can be NULL if there is no way for the caller to provide + * a reasonable destructor. The destructors deletes the argument self as well. */ - void (*deleter)(struct DLManagedTensorVersioned *self); + void (*deleter)(struct DLManagedTensorVersioned* self); /*! * \brief Additional bitmask flags information about the tensor. * From c1ec84e3643ab07914c39331718677beaee9556a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 16 Nov 2023 13:00:12 +0100 Subject: [PATCH 13/73] Update pycapsule_deleter --- python/pyarrow/_dlpack.pxi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index a7144c5de13..8b44905d69c 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -24,10 +24,23 @@ from cython import sizeof cdef void pycapsule_deleter(object dltensor) noexcept: cdef DLManagedTensor* dlm_tensor + cdef PyObject* err_type + cdef PyObject* err_value + cdef PyObject* err_traceback + + if cpython.PyCapsule_IsValid(dltensor, "used_dltensor"): + return + + cpython.PyErr_Fetch(&err_type, &err_value, &err_traceback) + if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): dlm_tensor = cpython.PyCapsule_GetPointer( dltensor, 'dltensor') dlm_tensor.deleter(dlm_tensor) + else: + cpython.PyErr_WriteUnraisable(dltensor) + + cpython.PyErr_Restore(err_type, err_value, err_traceback) cpdef object to_dlpack(Array arr) except *: From eabc58f7b5b7eed3ee972a9786bbd5ebe921efdc Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 16 Nov 2023 13:03:31 +0100 Subject: [PATCH 14/73] Add dlpack.h to dlpack.cc --- cpp/src/arrow/dlpack.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc index 71c9527913e..69619f879b2 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/dlpack.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include "arrow/dlpack.h" + #include "arrow/array/array_base.h" #include "arrow/dlpack_structure.h" #include "arrow/type.h" From f834d27adbefcb902a9851f38a76071ae1b122ec Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 21 Nov 2023 09:38:18 +0100 Subject: [PATCH 15/73] Add checks for zero length array and unsupported data types --- cpp/src/arrow/dlpack.cc | 41 +++++++++++++++++++++---- python/pyarrow/_dlpack.pxi | 5 +++ python/pyarrow/array.pxi | 3 -- python/pyarrow/tests/test_array.py | 49 ++++++++++++++++++++---------- 4 files changed, 73 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc index 69619f879b2..f0a751f6a37 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/dlpack.cc @@ -23,7 +23,7 @@ namespace arrow { -DLDataType getDLDataType(const Array& arr) { +DLDataType getDLDataType(const Array& arr, Status* status) { DLDataType dtype; dtype.lanes = 1; dtype.bits = arr.type()->bit_width(); @@ -49,7 +49,7 @@ DLDataType getDLDataType(const Array& arr) { dtype.code = DLDataTypeCode::kDLBool; break; default: - // TODO + *status = Status::TypeError("Can only use __dlpack__ on primitive arrays."); break; } return dtype; @@ -66,24 +66,53 @@ static void deleter(DLManagedTensor* arg) { } DLManagedTensor* toDLPack(const Array& arr) { + Status status = Status::OK(); + + // Return null pointer if the array has a validity bitmap + if (arr.null_bitmap() != NULLPTR) { + status = + Status::TypeError("Can only use __dlpack__ on arrays with no validity buffer."); + return NULLPTR; + } + + // Define the DLDataType struct + // Return null pointer if the data type is not supported + // by the protocol. Supported data types: int, uint, float + // and bool + DLDataType arr_type = getDLDataType(arr, &status); + if (!status.ok()) { + return NULLPTR; + } + + // Create DLMTensorCtx struct with the reference to + // the data of the array std::shared_ptr array_ref = arr.data(); DLMTensorCtx* DLMTensor = new DLMTensorCtx; DLMTensor->ref = array_ref; + // Define DLManagedTensor struct defined by + // DLPack (dlpack_structure.h) DLManagedTensor* dlm_tensor = &DLMTensor->tensor; dlm_tensor->manager_ctx = DLMTensor; dlm_tensor->deleter = &deleter; - dlm_tensor->dl_tensor.data = - const_cast(reinterpret_cast(array_ref->buffers[1]->address())); + // Define the data pointer to the DLTensor + // If array is of length 0, data pointer should be NULL + if (arr.length() == 0) { + dlm_tensor->dl_tensor.data = NULL; + } else { + dlm_tensor->dl_tensor.data = const_cast( + reinterpret_cast(array_ref->buffers[1]->address())); + } + + // Define DLDevice struct DLDevice ctx; ctx.device_id = 0; ctx.device_type = DLDeviceType::kDLCPU; dlm_tensor->dl_tensor.device = ctx; dlm_tensor->dl_tensor.ndim = 1; - dlm_tensor->dl_tensor.dtype = getDLDataType(arr); - + dlm_tensor->dl_tensor.dtype = arr_type; std::vector* shape_arr = &DLMTensor->shape; shape_arr->resize(1); (*shape_arr)[0] = arr.length(); diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 8b44905d69c..09626b34c22 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -45,4 +45,9 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cpdef object to_dlpack(Array arr) except *: dlm_tensor = toDLPack(deref(pyarrow_unwrap_array(arr).get())) + + if dlm_tensor == nullptr: + raise TypeError( + "Can only use __dlpack__ on primitive types with no validity buffer.") + return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index abc16769a96..5bb36f0b366 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1781,9 +1781,6 @@ cdef class Array(_PandasConvertible): return pyarrow_wrap_array(array) def __dlpack__(self, stream=None): - if len(self.buffers()) > 2 or self.buffers()[0]: - raise ArrowTypeError( - "Can only use __dlpack__ on primitive types with no validity buffer.") return to_dlpack(self) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index f1b12797b83..738a9433cf0 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3555,10 +3555,14 @@ def PyCapsule_IsValid(capsule, name): @pytest.mark.parametrize( - 'value_type', - [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] + ('value_type', 'np_type'), + [ + (pa.uint8(), np.int8), + (pa.uint32(), np.uint32), + (pa.float32(), np.float32) + ] ) -def test_dlpack(value_type): +def test_dlpack(value_type, np_type): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") arr = pa.array([1, 2, 3], type=value_type) @@ -3570,6 +3574,15 @@ def test_dlpack(value_type): result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected) + arr = pa.array([], type=value_type) + + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + + expected = np.array([], dtype=np_type) + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected) + # @pytest.mark.parametrize( # 'value_type', @@ -3591,16 +3604,20 @@ def test_dlpack(value_type): # np.testing.assert_array_equal(result, expected) -# def test_dlpack_not_supported(): -# if Version(np.__version__) < Version("1.22.0"): -# pytest.skip("No dlpack support in numpy versions older than 1.22.0.") -# with pytest.raises(pa.ArrowTypeError): -# arr = pa.array([1, None, 3]) -# np.from_dlpack(arr) - -# with pytest.raises(pa.ArrowTypeError): -# arr = pa.array( -# [[0, 1], [3, 4]], -# type=pa.list_(pa.int32()) -# ) -# np.from_dlpack(arr) +def test_dlpack_not_supported(): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + with pytest.raises(TypeError): + arr = pa.array([1, None, 3]) + np.from_dlpack(arr) + + with pytest.raises(TypeError): + arr = pa.array( + [[0, 1], [3, 4]], + type=pa.list_(pa.int32()) + ) + np.from_dlpack(arr) + + with pytest.raises(TypeError): + arr = pa.array([]) + np.from_dlpack(arr) From 3556b07aa36a030ec63604446050712a1406b626 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 22 Nov 2023 15:41:27 +0100 Subject: [PATCH 16/73] Update toDLPack signature and remove FixedShapeTensorArray implementation from this PR --- cpp/src/arrow/dlpack.cc | 16 ++++----- cpp/src/arrow/dlpack.h | 2 +- python/pyarrow/_dlpack.pxi | 2 +- python/pyarrow/array.pxi | 7 ---- python/pyarrow/includes/libarrow.pxd | 2 +- python/pyarrow/tests/test_array.py | 49 ++++++++++++---------------- 6 files changed, 31 insertions(+), 47 deletions(-) diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc index f0a751f6a37..6faaef91b6f 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/dlpack.cc @@ -23,11 +23,11 @@ namespace arrow { -DLDataType getDLDataType(const Array& arr, Status* status) { +DLDataType getDLDataType(const std::shared_ptr& arr, Status* status) { DLDataType dtype; dtype.lanes = 1; - dtype.bits = arr.type()->bit_width(); - switch (arr.type()->id()) { + dtype.bits = arr->type()->bit_width(); + switch (arr->type()->id()) { case Type::INT8: case Type::INT16: case Type::INT32: @@ -65,11 +65,11 @@ static void deleter(DLManagedTensor* arg) { delete static_cast(arg->manager_ctx); } -DLManagedTensor* toDLPack(const Array& arr) { +DLManagedTensor* toDLPack(const std::shared_ptr& arr) { Status status = Status::OK(); // Return null pointer if the array has a validity bitmap - if (arr.null_bitmap() != NULLPTR) { + if (arr->null_bitmap() != NULLPTR) { status = Status::TypeError("Can only use __dlpack__ on arrays with no validity buffer."); return NULLPTR; @@ -86,7 +86,7 @@ DLManagedTensor* toDLPack(const Array& arr) { // Create DLMTensorCtx struct with the reference to // the data of the array - std::shared_ptr array_ref = arr.data(); + std::shared_ptr array_ref = arr->data(); DLMTensorCtx* DLMTensor = new DLMTensorCtx; DLMTensor->ref = array_ref; @@ -98,7 +98,7 @@ DLManagedTensor* toDLPack(const Array& arr) { // Define the data pointer to the DLTensor // If array is of length 0, data pointer should be NULL - if (arr.length() == 0) { + if (arr->length() == 0) { dlm_tensor->dl_tensor.data = NULL; } else { dlm_tensor->dl_tensor.data = const_cast( @@ -115,7 +115,7 @@ DLManagedTensor* toDLPack(const Array& arr) { dlm_tensor->dl_tensor.dtype = arr_type; std::vector* shape_arr = &DLMTensor->shape; shape_arr->resize(1); - (*shape_arr)[0] = arr.length(); + (*shape_arr)[0] = arr->length(); dlm_tensor->dl_tensor.shape = shape_arr->data(); dlm_tensor->dl_tensor.strides = NULL; dlm_tensor->dl_tensor.byte_offset = 0; diff --git a/cpp/src/arrow/dlpack.h b/cpp/src/arrow/dlpack.h index 7be5d9b7da9..4287c291c69 100644 --- a/cpp/src/arrow/dlpack.h +++ b/cpp/src/arrow/dlpack.h @@ -27,6 +27,6 @@ namespace arrow { /// Returns pointer to the DLManagedTensor class defined by // the DLPack protocol ARROW_EXPORT -DLManagedTensor* toDLPack(const Array& arr); +DLManagedTensor* toDLPack(const std::shared_ptr& arr); } // namespace arrow diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 09626b34c22..a147ef1b1dc 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -44,7 +44,7 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cpdef object to_dlpack(Array arr) except *: - dlm_tensor = toDLPack(deref(pyarrow_unwrap_array(arr).get())) + dlm_tensor = toDLPack(pyarrow_unwrap_array(arr)) if dlm_tensor == nullptr: raise TypeError( diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 5bb36f0b366..4fdaf42ed20 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3590,13 +3590,6 @@ class FixedShapeTensorArray(ExtensionArray): FixedSizeListArray.from_arrays(np.ravel(obj, order='C'), size) ) - def __dlpack__(self, stream=None): - if len(self.buffers()) > 3 or self.buffers()[0] or self.buffers()[1]: - raise ArrowTypeError( - "Can only use __dlpack__ on fixed shape tensor array with no validity buffers.") - # return fixed_shape_tensor_to_dlpack(self) - return None - cdef dict _array_classes = { _Type_NA: NullArray, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8ce2e763d67..7bb7ef92642 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1250,7 +1250,7 @@ cdef extern from "arrow/dlpack_structure.h" nogil: cdef extern from "arrow/dlpack.h" namespace "arrow" nogil: - DLManagedTensor* toDLPack(const CArray& arr) + DLManagedTensor* toDLPack(const shared_ptr[CArray]& arr) cdef extern from "arrow/builder.h" namespace "arrow" nogil: diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 738a9433cf0..87fdd444c35 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3565,59 +3565,50 @@ def PyCapsule_IsValid(capsule, name): def test_dlpack(value_type, np_type): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - arr = pa.array([1, 2, 3], type=value_type) + arr = pa.array([1, 2, 3], type=value_type) DLTensor = arr.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - expected = np.array([1, 2, 3]) result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected) - arr = pa.array([], type=value_type) + # arr_sliced = arr.slice(1, 1) + # DLTensor = arr_sliced.__dlpack__() + # assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + # expected = np.array([2], dtype=np_type) + # result = np.from_dlpack(arr_sliced) + # np.testing.assert_array_equal(result, expected) - DLTensor = arr.__dlpack__() + arr_zero = pa.array([], type=value_type) + DLTensor = arr_zero.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - expected = np.array([], dtype=np_type) - result = np.from_dlpack(arr) + result = np.from_dlpack(arr_zero) np.testing.assert_array_equal(result, expected) -# @pytest.mark.parametrize( -# 'value_type', -# [pa.uint8(), pa.uint32(), pa.int16(), pa.float32()] -# ) -# def test_dlpack_on_fixed_shape_tensor(value_type): -# if Version(np.__version__) < Version("1.22.0"): -# pytest.skip("No dlpack support in numpy versions older than 1.22.0.") -# tensor_type = pa.fixed_shape_tensor(value_type, [2, 2]) -# arr = [[1, 2, 3, 4], [10, 20, 30, 40], [1, 2, 3, 4]] -# storage = pa.array(arr, pa.list_(value_type, 4)) -# arr = pa.ExtensionArray.from_storage(tensor_type, storage) - -# DLTensor = arr.__dlpack__() -# assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - -# expected = arr.to_numpy_ndarray() -# result = np.from_dlpack(arr) -# np.testing.assert_array_equal(result, expected) - - def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - with pytest.raises(TypeError): + + with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): arr = pa.array([1, None, 3]) np.from_dlpack(arr) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): arr = pa.array( [[0, 1], [3, 4]], type=pa.list_(pa.int32()) ) np.from_dlpack(arr) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): arr = pa.array([]) np.from_dlpack(arr) + + # DLPack doesn't support bit-packed boolean values + # Should we cast to uint8? + with pytest.raises(RuntimeError, match="Unsupported dtype in DLTensor"): + arr = pa.array([True, False, True]) + np.from_dlpack(arr) From 57ceee0b32d481438402aec9f3986b7306f77ceb Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 22 Nov 2023 16:56:11 +0100 Subject: [PATCH 17/73] Rename toDLPack to ExportToDLPack --- cpp/src/arrow/dlpack.cc | 2 +- cpp/src/arrow/dlpack.h | 2 +- python/pyarrow/_dlpack.pxi | 2 +- python/pyarrow/includes/libarrow.pxd | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc index 6faaef91b6f..7b7b355836d 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/dlpack.cc @@ -65,7 +65,7 @@ static void deleter(DLManagedTensor* arg) { delete static_cast(arg->manager_ctx); } -DLManagedTensor* toDLPack(const std::shared_ptr& arr) { +DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr) { Status status = Status::OK(); // Return null pointer if the array has a validity bitmap diff --git a/cpp/src/arrow/dlpack.h b/cpp/src/arrow/dlpack.h index 4287c291c69..99576ddf370 100644 --- a/cpp/src/arrow/dlpack.h +++ b/cpp/src/arrow/dlpack.h @@ -27,6 +27,6 @@ namespace arrow { /// Returns pointer to the DLManagedTensor class defined by // the DLPack protocol ARROW_EXPORT -DLManagedTensor* toDLPack(const std::shared_ptr& arr); +DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr); } // namespace arrow diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index a147ef1b1dc..854987b7714 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -44,7 +44,7 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cpdef object to_dlpack(Array arr) except *: - dlm_tensor = toDLPack(pyarrow_unwrap_array(arr)) + dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(arr)) if dlm_tensor == nullptr: raise TypeError( diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 7bb7ef92642..14bd979efe6 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1250,7 +1250,7 @@ cdef extern from "arrow/dlpack_structure.h" nogil: cdef extern from "arrow/dlpack.h" namespace "arrow" nogil: - DLManagedTensor* toDLPack(const shared_ptr[CArray]& arr) + DLManagedTensor* ExportToDLPack(const shared_ptr[CArray]& arr) cdef extern from "arrow/builder.h" namespace "arrow" nogil: From 78363df1d1e0763b4c8f4230500270383771bad3 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 22 Nov 2023 17:12:00 +0100 Subject: [PATCH 18/73] Update getDLDataType --- cpp/src/arrow/dlpack.cc | 13 ++++++++----- python/pyarrow/_dlpack.pxi | 2 +- python/pyarrow/tests/test_array.py | 10 ++++++---- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/dlpack.cc index 7b7b355836d..ee7bb2909cd 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/dlpack.cc @@ -23,11 +23,11 @@ namespace arrow { -DLDataType getDLDataType(const std::shared_ptr& arr, Status* status) { +DLDataType getDLDataType(const std::shared_ptr& type, Status* status) { DLDataType dtype; dtype.lanes = 1; - dtype.bits = arr->type()->bit_width(); - switch (arr->type()->id()) { + dtype.bits = type->bit_width(); + switch (type->id()) { case Type::INT8: case Type::INT16: case Type::INT32: @@ -46,7 +46,10 @@ DLDataType getDLDataType(const std::shared_ptr& arr, Status* status) { dtype.code = DLDataTypeCode::kDLFloat; break; case Type::BOOL: - dtype.code = DLDataTypeCode::kDLBool; + // DLPack supports byte-packed boolean values + // dtype.code = DLDataTypeCode::kDLBool; + *status = + Status::TypeError("Bit-packed boolean data type not supported by DLPack."); break; default: *status = Status::TypeError("Can only use __dlpack__ on primitive arrays."); @@ -79,7 +82,7 @@ DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr) { // Return null pointer if the data type is not supported // by the protocol. Supported data types: int, uint, float // and bool - DLDataType arr_type = getDLDataType(arr, &status); + DLDataType arr_type = getDLDataType(arr->type(), &status); if (!status.ok()) { return NULLPTR; } diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 854987b7714..e2d74c86ea9 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -48,6 +48,6 @@ cpdef object to_dlpack(Array arr) except *: if dlm_tensor == nullptr: raise TypeError( - "Can only use __dlpack__ on primitive types with no validity buffer.") + "Can only use __dlpack__ on primitive types (byte-packed booleans) with no validity buffer.") return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 87fdd444c35..39e08d8c4f6 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3592,23 +3592,25 @@ def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): + msg = ("use __dlpack__ on primitive types \\(byte-packed booleans\\) " + "with no validity buffer") + with pytest.raises(TypeError, match=msg): arr = pa.array([1, None, 3]) np.from_dlpack(arr) - with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): + with pytest.raises(TypeError, match=msg): arr = pa.array( [[0, 1], [3, 4]], type=pa.list_(pa.int32()) ) np.from_dlpack(arr) - with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive"): + with pytest.raises(TypeError, match=msg): arr = pa.array([]) np.from_dlpack(arr) # DLPack doesn't support bit-packed boolean values # Should we cast to uint8? - with pytest.raises(RuntimeError, match="Unsupported dtype in DLTensor"): + with pytest.raises(TypeError, match=msg): arr = pa.array([True, False, True]) np.from_dlpack(arr) From 22739e826bcb1c0151dabae8f259e1abbf516c12 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 22 Nov 2023 17:15:28 +0100 Subject: [PATCH 19/73] Move _dlpack.pxi include to lib.pyx --- python/pyarrow/array.pxi | 2 -- python/pyarrow/lib.pyx | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 4fdaf42ed20..0d94b4a82fd 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -21,8 +21,6 @@ import os import warnings from cython import sizeof -include "_dlpack.pxi" - cdef _sequence_to_array(object sequence, object mask, object size, DataType type, CMemoryPool* pool, c_bool from_pandas): diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 57fb0f42e38..29a0bed5594 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -176,6 +176,9 @@ include "table.pxi" # Tensors include "tensor.pxi" +# DLPack +include "_dlpack.pxi" + # File IO include "io.pxi" From 1bcf161ea7c5216ae4cae94168d4c84744222d5d Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 23 Nov 2023 17:22:17 +0100 Subject: [PATCH 20/73] Move dlpack files to arrow/c and add dlpack namespace --- cpp/src/arrow/CMakeLists.txt | 2 +- cpp/src/arrow/{ => c}/dlpack.cc | 10 +++++++--- cpp/src/arrow/{ => c}/dlpack.h | 8 ++++++-- cpp/src/arrow/{ => c}/dlpack_structure.h | 0 python/pyarrow/includes/libarrow.pxd | 6 +++--- 5 files changed, 17 insertions(+), 9 deletions(-) rename cpp/src/arrow/{ => c}/dlpack.cc (95%) rename cpp/src/arrow/{ => c}/dlpack.h (88%) rename cpp/src/arrow/{ => c}/dlpack_structure.h (100%) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index f2e38634ad4..00947c62756 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -174,7 +174,6 @@ set(ARROW_SRCS config.cc datum.cc device.cc - dlpack.cc extension_type.cc memory_pool.cc pretty_print.cc @@ -193,6 +192,7 @@ set(ARROW_SRCS type_traits.cc visitor.cc c/bridge.cc + c/dlpack.cc io/buffered.cc io/caching.cc io/compressed.cc diff --git a/cpp/src/arrow/dlpack.cc b/cpp/src/arrow/c/dlpack.cc similarity index 95% rename from cpp/src/arrow/dlpack.cc rename to cpp/src/arrow/c/dlpack.cc index ee7bb2909cd..d023dab9a8b 100644 --- a/cpp/src/arrow/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/dlpack.h" +#include "arrow/c/dlpack.h" #include "arrow/array/array_base.h" -#include "arrow/dlpack_structure.h" +#include "arrow/c/dlpack_structure.h" #include "arrow/type.h" namespace arrow { +namespace dlpack { + DLDataType getDLDataType(const std::shared_ptr& type, Status* status) { DLDataType dtype; dtype.lanes = 1; @@ -68,7 +70,7 @@ static void deleter(DLManagedTensor* arg) { delete static_cast(arg->manager_ctx); } -DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr) { +DLManagedTensor* Export(const std::shared_ptr& arr) { Status status = Status::OK(); // Return null pointer if the array has a validity bitmap @@ -126,4 +128,6 @@ DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr) { return dlm_tensor; } +} // namespace dlpack + } // namespace arrow diff --git a/cpp/src/arrow/dlpack.h b/cpp/src/arrow/c/dlpack.h similarity index 88% rename from cpp/src/arrow/dlpack.h rename to cpp/src/arrow/c/dlpack.h index 99576ddf370..80a5edb5865 100644 --- a/cpp/src/arrow/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -18,15 +18,19 @@ #pragma once #include "arrow/array/array_base.h" -#include "arrow/dlpack_structure.h" +#include "arrow/c/dlpack_structure.h" namespace arrow { +namespace dlpack { + /// \brief DLPack protocol for producing DLManagedTensor /// /// Returns pointer to the DLManagedTensor class defined by // the DLPack protocol ARROW_EXPORT -DLManagedTensor* ExportToDLPack(const std::shared_ptr& arr); +DLManagedTensor* Export(const std::shared_ptr& arr); + +} // namespace dlpack } // namespace arrow diff --git a/cpp/src/arrow/dlpack_structure.h b/cpp/src/arrow/c/dlpack_structure.h similarity index 100% rename from cpp/src/arrow/dlpack_structure.h rename to cpp/src/arrow/c/dlpack_structure.h diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 14bd979efe6..968b3c5127f 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1199,7 +1199,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type) -cdef extern from "arrow/dlpack_structure.h" nogil: +cdef extern from "arrow/c/dlpack_structure.h" nogil: cdef enum DLDeviceType: kDLCPU = 1 kDLCUDA = 2 @@ -1249,8 +1249,8 @@ cdef extern from "arrow/dlpack_structure.h" nogil: void (*deleter)(DLManagedTensor*) -cdef extern from "arrow/dlpack.h" namespace "arrow" nogil: - DLManagedTensor* ExportToDLPack(const shared_ptr[CArray]& arr) +cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil: + DLManagedTensor* ExportToDLPack" arrow::dlpack::Export"(const shared_ptr[CArray]& arr) cdef extern from "arrow/builder.h" namespace "arrow" nogil: From c8d8799a7210ecb22ea81900186c015fcaebe1cf Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 23 Nov 2023 17:57:36 +0100 Subject: [PATCH 21/73] Update test parametrisation and numpy assert --- python/pyarrow/tests/test_array.py | 31 ++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 39e08d8c4f6..7ba59212159 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3557,9 +3557,16 @@ def PyCapsule_IsValid(capsule, name): @pytest.mark.parametrize( ('value_type', 'np_type'), [ - (pa.uint8(), np.int8), + (pa.uint8(), np.uint8), + (pa.uint16(), np.uint16), (pa.uint32(), np.uint32), - (pa.float32(), np.float32) + (pa.uint64(), np.uint64), + (pa.int8(), np.int8), + (pa.int16(), np.int16), + (pa.int32(), np.int32), + (pa.int64(), np.int64), + (pa.float32(), np.float32), + (pa.float64(), np.float64), ] ) def test_dlpack(value_type, np_type): @@ -3569,23 +3576,35 @@ def test_dlpack(value_type, np_type): arr = pa.array([1, 2, 3], type=value_type) DLTensor = arr.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - expected = np.array([1, 2, 3]) + expected = np.array([1, 2, 3], dtype=np_type) result = np.from_dlpack(arr) - np.testing.assert_array_equal(result, expected) + np.testing.assert_array_equal(result, expected, strict=True) # arr_sliced = arr.slice(1, 1) # DLTensor = arr_sliced.__dlpack__() # assert PyCapsule_IsValid(DLTensor, b"dltensor") is True # expected = np.array([2], dtype=np_type) # result = np.from_dlpack(arr_sliced) - # np.testing.assert_array_equal(result, expected) + # np.testing.assert_array_equal(result, expected, strict=True) arr_zero = pa.array([], type=value_type) DLTensor = arr_zero.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True expected = np.array([], dtype=np_type) result = np.from_dlpack(arr_zero) - np.testing.assert_array_equal(result, expected) + np.testing.assert_array_equal(result, expected, strict=True) + + +def test_dlpack_float_16(): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + + expected = np.array([1, 2, 3], dtype=np.float16) + arr = pa.array(expected, type=pa.float16()) + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected, strict=True) def test_dlpack_not_supported(): From 2a5bf42989f21b05211e63513721a070098d16a2 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 12:47:14 +0100 Subject: [PATCH 22/73] Update C++ method and expand the docs note --- cpp/src/arrow/c/dlpack.cc | 45 ++++++++++++---------------- cpp/src/arrow/c/dlpack.h | 18 +++++++++-- python/pyarrow/_dlpack.pxi | 6 ++-- python/pyarrow/array.pxi | 21 ++++++++++++- python/pyarrow/includes/libarrow.pxd | 3 +- python/pyarrow/tests/test_array.py | 13 ++++---- 6 files changed, 64 insertions(+), 42 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index d023dab9a8b..4d17d0799c0 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -25,7 +25,7 @@ namespace arrow { namespace dlpack { -DLDataType getDLDataType(const std::shared_ptr& type, Status* status) { +Status getDLDataType(const std::shared_ptr& type, DLDataType* out) { DLDataType dtype; dtype.lanes = 1; dtype.bits = type->bit_width(); @@ -35,29 +35,29 @@ DLDataType getDLDataType(const std::shared_ptr& type, Status* status) case Type::INT32: case Type::INT64: dtype.code = DLDataTypeCode::kDLInt; - break; + *out = dtype; + return Status::OK(); case Type::UINT8: case Type::UINT16: case Type::UINT32: case Type::UINT64: dtype.code = DLDataTypeCode::kDLUInt; - break; + *out = dtype; + return Status::OK(); case Type::HALF_FLOAT: case Type::FLOAT: case Type::DOUBLE: dtype.code = DLDataTypeCode::kDLFloat; - break; + *out = dtype; + return Status::OK(); case Type::BOOL: // DLPack supports byte-packed boolean values - // dtype.code = DLDataTypeCode::kDLBool; - *status = - Status::TypeError("Bit-packed boolean data type not supported by DLPack."); - break; + return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); default: - *status = Status::TypeError("Can only use __dlpack__ on primitive arrays."); - break; + return Status::TypeError( + "Can only use __dlpack__ on primitive arrays without NullType and Decimal " + "types."); } - return dtype; } struct DLMTensorCtx { @@ -70,24 +70,16 @@ static void deleter(DLManagedTensor* arg) { delete static_cast(arg->manager_ctx); } -DLManagedTensor* Export(const std::shared_ptr& arr) { - Status status = Status::OK(); - - // Return null pointer if the array has a validity bitmap +Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { if (arr->null_bitmap() != NULLPTR) { - status = - Status::TypeError("Can only use __dlpack__ on arrays with no validity buffer."); - return NULLPTR; + return Status::TypeError( + "Can only use __dlpack__ on arrays with no validity buffer."); } // Define the DLDataType struct - // Return null pointer if the data type is not supported - // by the protocol. Supported data types: int, uint, float - // and bool - DLDataType arr_type = getDLDataType(arr->type(), &status); - if (!status.ok()) { - return NULLPTR; - } + // Supported data types: int, uint, float + DLDataType arr_type; + RETURN_NOT_OK(getDLDataType(arr->type(), &arr_type)); // Create DLMTensorCtx struct with the reference to // the data of the array @@ -125,7 +117,8 @@ DLManagedTensor* Export(const std::shared_ptr& arr) { dlm_tensor->dl_tensor.strides = NULL; dlm_tensor->dl_tensor.byte_offset = 0; - return dlm_tensor; + *out = dlm_tensor; + return Status::OK(); } } // namespace dlpack diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 80a5edb5865..db2fb3c16dd 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -26,10 +26,22 @@ namespace dlpack { /// \brief DLPack protocol for producing DLManagedTensor /// -/// Returns pointer to the DLManagedTensor class defined by -// the DLPack protocol +/// DLMangedTensor is produced from an array as defined by +/// the DLPack protocol, see https://dmlc.github.io/dlpack/latest/. +/// +/// Data types for which the protocol is supported are +/// primitive data types without NullType, BooleanType and +/// Decimal types. +/// +/// DLPack protocol only supports arrays with one contiguous +/// memory region which means Arrow Arrays with validity buffers +/// are not supported. +/// +/// \param[in] arr Arrow array +/// \param[out] out DLManagedTensor struct +/// \return Status ARROW_EXPORT -DLManagedTensor* Export(const std::shared_ptr& arr); +Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); } // namespace dlpack diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index e2d74c86ea9..8c7c228752e 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -44,10 +44,8 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cpdef object to_dlpack(Array arr) except *: - dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(arr)) - if dlm_tensor == nullptr: - raise TypeError( - "Can only use __dlpack__ on primitive types (byte-packed booleans) with no validity buffer.") + cdef DLManagedTensor* dlm_tensor + check_status(ExportToDLPack(pyarrow_unwrap_array(arr), &dlm_tensor)) return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 0d94b4a82fd..dced7a981e3 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1779,7 +1779,26 @@ cdef class Array(_PandasConvertible): return pyarrow_wrap_array(array) def __dlpack__(self, stream=None): - return to_dlpack(self) + """Export a primitive array as a DLPack capsule. + + Parameters + ---------- + stream : int, optional + A Python integer representing a pointer to a stream. Currently not supported. + Stream is provided by the consumer to the producer to instruct the producer + to ensure that operations can safely be performed on the array. + + Returns + ------- + capsule : PyCapsule + A DLPack capsule for the array, containing a DLPackManagedTensor. + """ + if stream is None: + return to_dlpack(self) + else: + raise NotImplementedError( + "Only stream=None is supported." + ) cdef _array_like_to_pandas(obj, options, types_mapper): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 968b3c5127f..c3baab8bf46 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1250,7 +1250,8 @@ cdef extern from "arrow/c/dlpack_structure.h" nogil: cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil: - DLManagedTensor* ExportToDLPack" arrow::dlpack::Export"(const shared_ptr[CArray]& arr) + CStatus ExportToDLPack" arrow::dlpack::ExportArray"(const shared_ptr[CArray]& arr, + DLManagedTensor** out) cdef extern from "arrow/builder.h" namespace "arrow" nogil: diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 7ba59212159..28a5d53bfb0 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3611,25 +3611,24 @@ def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - msg = ("use __dlpack__ on primitive types \\(byte-packed booleans\\) " - "with no validity buffer") - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match="Can only use __dlpack__ " + "on arrays with no validity buffer."): arr = pa.array([1, None, 3]) np.from_dlpack(arr) - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): arr = pa.array( [[0, 1], [3, 4]], type=pa.list_(pa.int32()) ) np.from_dlpack(arr) - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): arr = pa.array([]) np.from_dlpack(arr) # DLPack doesn't support bit-packed boolean values - # Should we cast to uint8? - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match="Bit-packed boolean data type " + "not supported by DLPack."): arr = pa.array([True, False, True]) np.from_dlpack(arr) From 21b95d8688098137cd981925f8c99eb46c1d55de Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 12:59:23 +0100 Subject: [PATCH 23/73] Rename dlpack_structure.h to dlpack_abi.h and add commit info --- cpp/src/arrow/c/{dlpack_structure.h => dlpack_abi.h} | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) rename cpp/src/arrow/c/{dlpack_structure.h => dlpack_abi.h} (98%) diff --git a/cpp/src/arrow/c/dlpack_structure.h b/cpp/src/arrow/c/dlpack_abi.h similarity index 98% rename from cpp/src/arrow/c/dlpack_structure.h rename to cpp/src/arrow/c/dlpack_abi.h index 152bc2a9c86..2587eef6d30 100644 --- a/cpp/src/arrow/c/dlpack_structure.h +++ b/cpp/src/arrow/c/dlpack_abi.h @@ -1,3 +1,5 @@ +// Taken from: +// https://github.com/dmlc/dlpack/blob/ca4d00ad3e2e0f410eeab3264d21b8a39397f362/include/dlpack/dlpack.h /*! * Copyright (c) 2017 by Contributors * \file dlpack.h @@ -316,4 +318,4 @@ struct DLManagedTensorVersioned { #ifdef __cplusplus } // DLPACK_EXTERN_C #endif -#endif // DLPACK_DLPACK_H_ +#endif // DLPACK_DLPACK_H_ \ No newline at end of file From 176254428c77aacfac62744df6fda7fe288bd721 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 13:01:56 +0100 Subject: [PATCH 24/73] Empty line --- cpp/src/arrow/c/dlpack_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/c/dlpack_abi.h b/cpp/src/arrow/c/dlpack_abi.h index 2587eef6d30..4af557a7ed5 100644 --- a/cpp/src/arrow/c/dlpack_abi.h +++ b/cpp/src/arrow/c/dlpack_abi.h @@ -318,4 +318,4 @@ struct DLManagedTensorVersioned { #ifdef __cplusplus } // DLPACK_EXTERN_C #endif -#endif // DLPACK_DLPACK_H_ \ No newline at end of file +#endif // DLPACK_DLPACK_H_ From fedd464ba82798b5415b70abb04ee6ea6b04e32b Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 13:13:46 +0100 Subject: [PATCH 25/73] Add comments to pycapsule_deleter --- python/pyarrow/_dlpack.pxi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 8c7c228752e..a6f605aa2ef 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -28,9 +28,12 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cdef PyObject* err_value cdef PyObject* err_traceback + # Do nothing if the capsule has been consumed if cpython.PyCapsule_IsValid(dltensor, "used_dltensor"): return + # An exception may be in-flight, we must save it in case + # we create another one cpython.PyErr_Fetch(&err_type, &err_value, &err_traceback) if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): @@ -40,6 +43,7 @@ cdef void pycapsule_deleter(object dltensor) noexcept: else: cpython.PyErr_WriteUnraisable(dltensor) + # Set the error indicator from err_type, err_value, err_traceback cpython.PyErr_Restore(err_type, err_value, err_traceback) From 01997a6da43d85d1004846d01cb5677f2828b9cc Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 13:18:12 +0100 Subject: [PATCH 26/73] Update includes --- cpp/src/arrow/c/dlpack.cc | 2 +- cpp/src/arrow/c/dlpack.h | 2 +- python/pyarrow/includes/libarrow.pxd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 4d17d0799c0..40a9a1a86cf 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -18,7 +18,7 @@ #include "arrow/c/dlpack.h" #include "arrow/array/array_base.h" -#include "arrow/c/dlpack_structure.h" +#include "arrow/c/dlpack_abi.h" #include "arrow/type.h" namespace arrow { diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index db2fb3c16dd..8988ed63c1a 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -18,7 +18,7 @@ #pragma once #include "arrow/array/array_base.h" -#include "arrow/c/dlpack_structure.h" +#include "arrow/c/dlpack_abi.h" namespace arrow { diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index c3baab8bf46..0b9c6d4cb35 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1199,7 +1199,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type) -cdef extern from "arrow/c/dlpack_structure.h" nogil: +cdef extern from "arrow/c/dlpack_abi.h" nogil: cdef enum DLDeviceType: kDLCPU = 1 kDLCUDA = 2 From f8dbb0b7f8ad0bd72c1c0a30bb9df0ef7c21eb81 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 14:48:00 +0100 Subject: [PATCH 27/73] Skip test on numpy < 1.24.0 --- python/pyarrow/tests/test_array.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 28a5d53bfb0..9c21457515e 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3571,7 +3571,9 @@ def PyCapsule_IsValid(capsule, name): ) def test_dlpack(value_type, np_type): if Version(np.__version__) < Version("1.22.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + pytest.skip("No dlpack support in numpy versions older than 1.22.0, " + "strict keyward in assert_array_equal added in numpy version " + "1.24.0") arr = pa.array([1, 2, 3], type=value_type) DLTensor = arr.__dlpack__() @@ -3597,7 +3599,9 @@ def test_dlpack(value_type, np_type): def test_dlpack_float_16(): if Version(np.__version__) < Version("1.22.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + pytest.skip("No dlpack support in numpy versions older than 1.22.0, " + "strict keyward in assert_array_equal added in numpy version " + "1.24.0") expected = np.array([1, 2, 3], dtype=np.float16) arr = pa.array(expected, type=pa.float16()) From c6ee1bb3f654a9bd00eac6a114d8ac35b7ce4aae Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 27 Nov 2023 18:45:30 +0100 Subject: [PATCH 28/73] Skip the tests for real --- cpp/src/arrow/c/CMakeLists.txt | 1 + python/pyarrow/tests/test_array.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt index 3765477ba09..81a81cd3f11 100644 --- a/cpp/src/arrow/c/CMakeLists.txt +++ b/cpp/src/arrow/c/CMakeLists.txt @@ -16,6 +16,7 @@ # under the License. add_arrow_test(bridge_test PREFIX "arrow-c") +add_arrow_test(dlpack_test) add_arrow_benchmark(bridge_benchmark) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 9c21457515e..24d3d970901 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3570,7 +3570,7 @@ def PyCapsule_IsValid(capsule, name): ] ) def test_dlpack(value_type, np_type): - if Version(np.__version__) < Version("1.22.0"): + if Version(np.__version__) < Version("1.24.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0, " "strict keyward in assert_array_equal added in numpy version " "1.24.0") @@ -3598,7 +3598,7 @@ def test_dlpack(value_type, np_type): def test_dlpack_float_16(): - if Version(np.__version__) < Version("1.22.0"): + if Version(np.__version__) < Version("1.24.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0, " "strict keyward in assert_array_equal added in numpy version " "1.24.0") From bcd05ea3dc650bda15b7a0cc82d471b91a918ad7 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 28 Nov 2023 09:01:17 +0100 Subject: [PATCH 29/73] Remove leftover from the CMakeLists --- cpp/src/arrow/c/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt index 81a81cd3f11..3765477ba09 100644 --- a/cpp/src/arrow/c/CMakeLists.txt +++ b/cpp/src/arrow/c/CMakeLists.txt @@ -16,7 +16,6 @@ # under the License. add_arrow_test(bridge_test PREFIX "arrow-c") -add_arrow_test(dlpack_test) add_arrow_benchmark(bridge_benchmark) From 6626a9161191be30164a025785c8f5f4fa435b57 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 28 Nov 2023 09:54:10 +0100 Subject: [PATCH 30/73] Add a CPU device check and pyarrow test for cuda (trial) --- cpp/src/arrow/c/dlpack.cc | 10 ++++++++-- python/pyarrow/tests/test_array.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 40a9a1a86cf..cb9fcc55176 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -19,6 +19,7 @@ #include "arrow/array/array_base.h" #include "arrow/c/dlpack_abi.h" +#include "arrow/device.h" #include "arrow/type.h" namespace arrow { @@ -104,8 +105,13 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { // Define DLDevice struct DLDevice ctx; - ctx.device_id = 0; - ctx.device_type = DLDeviceType::kDLCPU; + if (array_ref->buffers[1]->device_type() == DeviceAllocationType::kCPU) { + ctx.device_id = 0; + ctx.device_type = DLDeviceType::kDLCPU; + } else { + return Status::NotImplemented( + "DLPack support is implemented only for buffers on CPU device."); + } dlm_tensor->dl_tensor.device = ctx; dlm_tensor->dl_tensor.ndim = 1; diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 24d3d970901..8483abf410c 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3636,3 +3636,19 @@ def test_dlpack_not_supported(): "not supported by DLPack."): arr = pa.array([True, False, True]) np.from_dlpack(arr) + +def test_dlpack_cuda_not_supported(): + cuda = pytest.importorskip("pyarrow.cuda") + + schema = pa.schema([pa.field('f0', pa.int16())]) + a0 = pa.array([1, 2, 3], type = pa.int16()) + batch = pa.record_batch([a0], schema=schema) + + cbuf = cuda.serialize_record_batch(batch, cuda.Context(0)) + cbatch = cuda.read_record_batch(cbuf, batch.schema) + carr = cbatch["a0"] + + # CudaBuffers not yet supported + with pytest.raises(NotImplementedError, match="DLPack support is implemented " + "only for buffers on CPU device."): + np.from_dlpack(carr) From d169d4c398906ea8bbcd3d4e64f91b564e0921e5 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 28 Nov 2023 15:50:12 +0100 Subject: [PATCH 31/73] Handle offsets --- cpp/src/arrow/c/dlpack.cc | 10 +++++++++- python/pyarrow/tests/test_array.py | 26 ++++++++++++++++++++------ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index cb9fcc55176..67a7e66d65c 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -95,9 +95,17 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { dlm_tensor->deleter = &deleter; // Define the data pointer to the DLTensor - // If array is of length 0, data pointer should be NULL + // If array is of length 0, data poin ter should be NULL if (arr->length() == 0) { dlm_tensor->dl_tensor.data = NULL; + } else if (arr->offset() > 0) { + const auto byte_width = arr->type()->byte_width(); + const auto start = arr->offset() * byte_width; + ARROW_ASSIGN_OR_RAISE( + auto sliced_buffer, + SliceBufferSafe(array_ref->buffers[1], start)); + dlm_tensor->dl_tensor.data = const_cast( + reinterpret_cast(sliced_buffer->address())); } else { dlm_tensor->dl_tensor.data = const_cast( reinterpret_cast(array_ref->buffers[1]->address())); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8483abf410c..0417f859f8c 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3582,12 +3582,26 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected, strict=True) - # arr_sliced = arr.slice(1, 1) - # DLTensor = arr_sliced.__dlpack__() - # assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - # expected = np.array([2], dtype=np_type) - # result = np.from_dlpack(arr_sliced) - # np.testing.assert_array_equal(result, expected, strict=True) + arr_sliced = arr.slice(1, 1) + DLTensor = arr_sliced.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + expected = np.array([2], dtype=np_type) + result = np.from_dlpack(arr_sliced) + np.testing.assert_array_equal(result, expected, strict=True) + + arr_sliced = arr.slice(0, 1) + DLTensor = arr_sliced.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + expected = np.array([1], dtype=np_type) + result = np.from_dlpack(arr_sliced) + np.testing.assert_array_equal(result, expected, strict=True) + + arr_sliced = arr.slice(1) + DLTensor = arr_sliced.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + expected = np.array([2, 3], dtype=np_type) + result = np.from_dlpack(arr_sliced) + np.testing.assert_array_equal(result, expected, strict=True) arr_zero = pa.array([], type=value_type) DLTensor = arr_zero.__dlpack__() From 11d48af1e21d84c45f5d629795c03ab7163bd3d9 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 28 Nov 2023 16:01:04 +0100 Subject: [PATCH 32/73] Linter --- cpp/src/arrow/c/dlpack.cc | 11 +++++------ python/pyarrow/tests/test_array.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 67a7e66d65c..4709f6a7443 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -95,17 +95,16 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { dlm_tensor->deleter = &deleter; // Define the data pointer to the DLTensor - // If array is of length 0, data poin ter should be NULL + // If array is of length 0, data pointer should be NULL if (arr->length() == 0) { dlm_tensor->dl_tensor.data = NULL; } else if (arr->offset() > 0) { const auto byte_width = arr->type()->byte_width(); const auto start = arr->offset() * byte_width; - ARROW_ASSIGN_OR_RAISE( - auto sliced_buffer, - SliceBufferSafe(array_ref->buffers[1], start)); - dlm_tensor->dl_tensor.data = const_cast( - reinterpret_cast(sliced_buffer->address())); + ARROW_ASSIGN_OR_RAISE(auto sliced_buffer, + SliceBufferSafe(array_ref->buffers[1], start)); + dlm_tensor->dl_tensor.data = + const_cast(reinterpret_cast(sliced_buffer->address())); } else { dlm_tensor->dl_tensor.data = const_cast( reinterpret_cast(array_ref->buffers[1]->address())); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 0417f859f8c..fc9bf1c93bb 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3651,11 +3651,12 @@ def test_dlpack_not_supported(): arr = pa.array([True, False, True]) np.from_dlpack(arr) + def test_dlpack_cuda_not_supported(): cuda = pytest.importorskip("pyarrow.cuda") schema = pa.schema([pa.field('f0', pa.int16())]) - a0 = pa.array([1, 2, 3], type = pa.int16()) + a0 = pa.array([1, 2, 3], type=pa.int16()) batch = pa.record_batch([a0], schema=schema) cbuf = cuda.serialize_record_batch(batch, cuda.Context(0)) From 5742e1d1149e8835d6fef24975d38ad7b8ca20d8 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 28 Nov 2023 16:06:17 +0100 Subject: [PATCH 33/73] Relax null_bitmap check to null_count check --- cpp/src/arrow/c/dlpack.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 4709f6a7443..14a187e3a89 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -72,7 +72,7 @@ static void deleter(DLManagedTensor* arg) { } Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { - if (arr->null_bitmap() != NULLPTR) { + if (arr->null_count() > 0) { return Status::TypeError( "Can only use __dlpack__ on arrays with no validity buffer."); } From 024f53587bbd9af31008cb6a2c542bbcb7021224 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 29 Nov 2023 10:19:25 +0100 Subject: [PATCH 34/73] Update python tests --- python/pyarrow/tests/test_array.py | 33 +++++++++--------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index fc9bf1c93bb..46a3f34fd6c 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3565,6 +3565,7 @@ def PyCapsule_IsValid(capsule, name): (pa.int16(), np.int16), (pa.int32(), np.int32), (pa.int64(), np.int64), + (pa.float16(), np.float16), (pa.float32(), np.float32), (pa.float64(), np.float64), ] @@ -3575,10 +3576,10 @@ def test_dlpack(value_type, np_type): "strict keyward in assert_array_equal added in numpy version " "1.24.0") - arr = pa.array([1, 2, 3], type=value_type) + expected = np.array([1, 2, 3], dtype=np_type) + arr = pa.array(expected, type=value_type) DLTensor = arr.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - expected = np.array([1, 2, 3], dtype=np_type) result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected, strict=True) @@ -3611,44 +3612,30 @@ def test_dlpack(value_type, np_type): np.testing.assert_array_equal(result, expected, strict=True) -def test_dlpack_float_16(): - if Version(np.__version__) < Version("1.24.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0, " - "strict keyward in assert_array_equal added in numpy version " - "1.24.0") - - expected = np.array([1, 2, 3], dtype=np.float16) - arr = pa.array(expected, type=pa.float16()) - DLTensor = arr.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - result = np.from_dlpack(arr) - np.testing.assert_array_equal(result, expected, strict=True) - - def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + arr = pa.array([1, None, 3]) with pytest.raises(TypeError, match="Can only use __dlpack__ " "on arrays with no validity buffer."): - arr = pa.array([1, None, 3]) np.from_dlpack(arr) + arr = pa.array( + [[0, 1], [3, 4]], + type=pa.list_(pa.int32()) + ) with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): - arr = pa.array( - [[0, 1], [3, 4]], - type=pa.list_(pa.int32()) - ) np.from_dlpack(arr) + arr = pa.array([]) with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): - arr = pa.array([]) np.from_dlpack(arr) # DLPack doesn't support bit-packed boolean values + arr = pa.array([True, False, True]) with pytest.raises(TypeError, match="Bit-packed boolean data type " "not supported by DLPack."): - arr = pa.array([True, False, True]) np.from_dlpack(arr) From e6d927c16bf7b6fbeb74073a6565add5646b2702 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 29 Nov 2023 10:20:51 +0100 Subject: [PATCH 35/73] Update dlpack.cc --- cpp/src/arrow/c/dlpack.cc | 2 +- python/pyarrow/tests/test_array.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 14a187e3a89..ae37f67238d 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -74,7 +74,7 @@ static void deleter(DLManagedTensor* arg) { Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { if (arr->null_count() > 0) { return Status::TypeError( - "Can only use __dlpack__ on arrays with no validity buffer."); + "Can only use __dlpack__ on arrays with no nulls."); } // Define the DLDataType struct diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 46a3f34fd6c..3c0c63753c7 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3618,7 +3618,7 @@ def test_dlpack_not_supported(): arr = pa.array([1, None, 3]) with pytest.raises(TypeError, match="Can only use __dlpack__ " - "on arrays with no validity buffer."): + "on arrays with no nulls."): np.from_dlpack(arr) arr = pa.array( From da9baf2201c705080f2cd36e676c91d4ada301b6 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 29 Nov 2023 10:34:11 +0100 Subject: [PATCH 36/73] Update libarrow.pxd --- python/pyarrow/includes/libarrow.pxd | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 0b9c6d4cb35..a8b07015e5e 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1220,15 +1220,6 @@ cdef extern from "arrow/c/dlpack_abi.h" nogil: DLDeviceType device_type int32_t device_id - cdef enum DLDataTypeCode: - kDLInt = 0 - kDLUInt = 1 - kDLFloat = 2 - kDLOpaqueHandle = 3 - kDLBfloat = 4 - kDLComplex = 5 - kDLBool = 6 - ctypedef struct DLDataType: uint8_t code uint8_t bits From 5e2bb80724eeeacec96121526ae2218dbfebc902 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 29 Nov 2023 10:37:56 +0100 Subject: [PATCH 37/73] Fix typo in cuda test --- python/pyarrow/tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 3c0c63753c7..724b0c3ea21 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3648,7 +3648,7 @@ def test_dlpack_cuda_not_supported(): cbuf = cuda.serialize_record_batch(batch, cuda.Context(0)) cbatch = cuda.read_record_batch(cbuf, batch.schema) - carr = cbatch["a0"] + carr = cbatch["f0"] # CudaBuffers not yet supported with pytest.raises(NotImplementedError, match="DLPack support is implemented " From 3af44e15d8c271d2e62927f6ebd6619cb97eba2d Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 29 Nov 2023 16:09:05 +0100 Subject: [PATCH 38/73] Add C++ tests --- cpp/src/arrow/c/CMakeLists.txt | 1 + cpp/src/arrow/c/dlpack_test.cc | 135 +++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 cpp/src/arrow/c/dlpack_test.cc diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt index 3765477ba09..81a81cd3f11 100644 --- a/cpp/src/arrow/c/CMakeLists.txt +++ b/cpp/src/arrow/c/CMakeLists.txt @@ -16,6 +16,7 @@ # under the License. add_arrow_test(bridge_test PREFIX "arrow-c") +add_arrow_test(dlpack_test) add_arrow_benchmark(bridge_benchmark) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc new file mode 100644 index 00000000000..a93cfcfc03b --- /dev/null +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/array/array_base.h" +#include "arrow/c/dlpack.h" +#include "arrow/c/dlpack_abi.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" + +namespace arrow { + +namespace dlpack { + +// using ExportArray = arrow::dlpack::ExportArray; + +class TestExportArray : public ::testing::Test { + public: + void SetUp() {} +}; + +static std::vector> TestExportArrayAgainstTheseTypes() { + return { + int8(), uint8(), int16(), uint16(), int32(), uint32(), + int64(), uint64(), float16(), float32(), float64(), + }; +} + +Result getDLDataType(const std::shared_ptr& type) { + switch (type->id()) { + case Type::INT8: + case Type::INT16: + case Type::INT32: + case Type::INT64: + return DLDataTypeCode::kDLInt; + case Type::UINT8: + case Type::UINT16: + case Type::UINT32: + case Type::UINT64: + return DLDataTypeCode::kDLUInt; + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + return DLDataTypeCode::kDLFloat; + case Type::BOOL: + return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); + default: + return Status::TypeError( + "Can only use __dlpack__ on primitive arrays without NullType and Decimal " + "types."); + } +} + +auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr t, + int64_t length) { + DLManagedTensor* dlmtensor; + ASSERT_OK(arrow::dlpack::ExportArray(arr, &dlmtensor)); + auto dltensor = dlmtensor->dl_tensor; + + const auto byte_width = arr->type()->byte_width(); + const auto start = arr->offset() * byte_width; + ASSERT_OK_AND_ASSIGN(auto sliced_buffer, + SliceBufferSafe(arr->data()->buffers[1], start)); + ASSERT_EQ(sliced_buffer->data(), dltensor.data); + + ASSERT_EQ(0, dltensor.byte_offset); + ASSERT_EQ(NULL, dltensor.strides); + ASSERT_EQ(length, dltensor.shape[0]); + ASSERT_EQ(1, dltensor.ndim); + + ASSERT_OK_AND_ASSIGN(auto code, getDLDataType(t)); + ASSERT_EQ(code, dltensor.dtype.code); + + ASSERT_EQ(t->bit_width(), dltensor.dtype.bits); + ASSERT_EQ(1, dltensor.dtype.lanes); + ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type); + ASSERT_EQ(0, dltensor.device.device_id); +}; + +TEST_F(TestExportArray, TestSupportedArray) { + random::RandomArrayGenerator gen(0); + + for (auto type : TestExportArrayAgainstTheseTypes()) { + const std::shared_ptr array = gen.ArrayOf(type, 10, 0); + check_dlptensor(array, type, 10); + ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); + check_dlptensor(sliced_1, type, 5); + ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); + check_dlptensor(sliced_2, type, 5); + ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); + check_dlptensor(sliced_3, type, 7); + }; +} + +TEST_F(TestExportArray, TestUnSupportedArray) { + random::RandomArrayGenerator gen(0); + + const std::shared_ptr array_with_null = gen.Int8(10, 1, 100, 1); + DLManagedTensor* dlmtensor_1; + ASSERT_RAISES_WITH_MESSAGE( + TypeError, "Type error: Can only use __dlpack__ on arrays with no nulls.", + arrow::dlpack::ExportArray(array_with_null, &dlmtensor_1)); + + const std::shared_ptr array_string = gen.String(10, 0, 10, 0); + DLManagedTensor* dlmtensor_2; + ASSERT_RAISES_WITH_MESSAGE(TypeError, + "Type error: Can only use __dlpack__ on primitive arrays " + "without NullType and Decimal types.", + arrow::dlpack::ExportArray(array_string, &dlmtensor_2)); + + const std::shared_ptr array_boolean = gen.Boolean(10, 0.5, 0); + DLManagedTensor* dlmtensor_3; + ASSERT_RAISES_WITH_MESSAGE( + TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", + arrow::dlpack::ExportArray(array_boolean, &dlmtensor_3)); +} + +} // namespace dlpack + +} // namespace arrow From 1f812776ea802771350e46655aa1efe12695a6cc Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 11:40:42 +0100 Subject: [PATCH 39/73] Update C++ tests --- cpp/src/arrow/c/dlpack_test.cc | 53 ++++++++++++---------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index a93cfcfc03b..cdf95e6fe5f 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -41,33 +41,17 @@ static std::vector> TestExportArrayAgainstTheseTypes() }; } -Result getDLDataType(const std::shared_ptr& type) { - switch (type->id()) { - case Type::INT8: - case Type::INT16: - case Type::INT32: - case Type::INT64: - return DLDataTypeCode::kDLInt; - case Type::UINT8: - case Type::UINT16: - case Type::UINT32: - case Type::UINT64: - return DLDataTypeCode::kDLUInt; - case Type::HALF_FLOAT: - case Type::FLOAT: - case Type::DOUBLE: - return DLDataTypeCode::kDLFloat; - case Type::BOOL: - return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); - default: - return Status::TypeError( - "Can only use __dlpack__ on primitive arrays without NullType and Decimal " - "types."); - } +static std::vector TestExpectedDLPackDataTypes() { + return { + DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, + DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, + DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLFloat, + DLDataTypeCode::kDLFloat, DLDataTypeCode::kDLFloat, + }; } -auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr t, - int64_t length) { +auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr arrow_type, + DLDataTypeCode dlpack_type, int64_t length) { DLManagedTensor* dlmtensor; ASSERT_OK(arrow::dlpack::ExportArray(arr, &dlmtensor)); auto dltensor = dlmtensor->dl_tensor; @@ -83,10 +67,9 @@ auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptrbit_width(), dltensor.dtype.bits); + ASSERT_EQ(arrow_type->bit_width(), dltensor.dtype.bits); ASSERT_EQ(1, dltensor.dtype.lanes); ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type); ASSERT_EQ(0, dltensor.device.device_id); @@ -95,16 +78,16 @@ auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr array = gen.ArrayOf(type, 10, 0); - check_dlptensor(array, type, 10); + for (int64_t i = 0; i < 11; ++i) { + const std::shared_ptr array = gen.ArrayOf(TestExportArrayAgainstTheseTypes()[i], 10, 0); + check_dlptensor(array, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 10); ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); - check_dlptensor(sliced_1, type, 5); + check_dlptensor(sliced_1, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); - check_dlptensor(sliced_2, type, 5); + check_dlptensor(sliced_2, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); - check_dlptensor(sliced_3, type, 7); - }; + check_dlptensor(sliced_3, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 7); + } } TEST_F(TestExportArray, TestUnSupportedArray) { From cb0a942528a9307f7cb00e9844acf87fc6997d02 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 11:41:11 +0100 Subject: [PATCH 40/73] Linter --- cpp/src/arrow/c/dlpack_test.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index cdf95e6fe5f..cd1877ce0f1 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -50,7 +50,8 @@ static std::vector TestExpectedDLPackDataTypes() { }; } -auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr arrow_type, +auto check_dlptensor = [](const std::shared_ptr& arr, + std::shared_ptr arrow_type, DLDataTypeCode dlpack_type, int64_t length) { DLManagedTensor* dlmtensor; ASSERT_OK(arrow::dlpack::ExportArray(arr, &dlmtensor)); @@ -79,14 +80,19 @@ TEST_F(TestExportArray, TestSupportedArray) { random::RandomArrayGenerator gen(0); for (int64_t i = 0; i < 11; ++i) { - const std::shared_ptr array = gen.ArrayOf(TestExportArrayAgainstTheseTypes()[i], 10, 0); - check_dlptensor(array, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 10); + const std::shared_ptr array = + gen.ArrayOf(TestExportArrayAgainstTheseTypes()[i], 10, 0); + check_dlptensor(array, TestExportArrayAgainstTheseTypes()[i], + TestExpectedDLPackDataTypes()[i], 10); ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); - check_dlptensor(sliced_1, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 5); + check_dlptensor(sliced_1, TestExportArrayAgainstTheseTypes()[i], + TestExpectedDLPackDataTypes()[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); - check_dlptensor(sliced_2, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 5); + check_dlptensor(sliced_2, TestExportArrayAgainstTheseTypes()[i], + TestExpectedDLPackDataTypes()[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); - check_dlptensor(sliced_3, TestExportArrayAgainstTheseTypes()[i], TestExpectedDLPackDataTypes()[i], 7); + check_dlptensor(sliced_3, TestExportArrayAgainstTheseTypes()[i], + TestExpectedDLPackDataTypes()[i], 7); } } From 46206eca5945967c43d135b68d58aded0df29c89 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 11:44:28 +0100 Subject: [PATCH 41/73] Remove redundant code in libarrow.pxd --- python/pyarrow/includes/libarrow.pxd | 36 ---------------------------- 1 file changed, 36 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index a8b07015e5e..ffc46d91c4c 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1200,43 +1200,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef extern from "arrow/c/dlpack_abi.h" nogil: - cdef enum DLDeviceType: - kDLCPU = 1 - kDLCUDA = 2 - kDLCUDAHost = 3 - kDLOpenCL = 4 - kDLVulkan = 7 - kDLMetal = 8 - kDLVPI = 9 - kDLROCM = 10 - kDLROCMHost = 11 - kDLExtDev = 12 - kDLCUDAManaged = 13 - kDLOneAPI = 14 - kDLWebGPU = 15 - kDLHexagon = 16 - - ctypedef struct DLDevice: - DLDeviceType device_type - int32_t device_id - - ctypedef struct DLDataType: - uint8_t code - uint8_t bits - uint16_t lanes - - ctypedef struct DLTensor: - void* data - DLDevice device - int32_t ndim - DLDataType dtype - int64_t* shape - int64_t* strides - uint64_t byte_offset - ctypedef struct DLManagedTensor: - DLTensor dl_tensor - void* manager_ctx void (*deleter)(DLManagedTensor*) From 0619c358d031935067f20886e75abd635beae04f Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 15:57:23 +0100 Subject: [PATCH 42/73] Add __dlpack_device__ implementation --- cpp/src/arrow/c/dlpack.cc | 12 ++++++++++-- cpp/src/arrow/c/dlpack.h | 8 ++++++++ cpp/src/arrow/c/dlpack_test.cc | 3 +++ python/pyarrow/_dlpack.pxi | 5 +++++ python/pyarrow/array.pxi | 6 ++++++ python/pyarrow/includes/libarrow.pxd | 18 ++++++++++++++++++ python/pyarrow/tests/test_array.py | 14 ++++++++++++++ 7 files changed, 64 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index ae37f67238d..a5e01d70bee 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -73,8 +73,7 @@ static void deleter(DLManagedTensor* arg) { Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { if (arr->null_count() > 0) { - return Status::TypeError( - "Can only use __dlpack__ on arrays with no nulls."); + return Status::TypeError("Can only use __dlpack__ on arrays with no nulls."); } // Define the DLDataType struct @@ -134,6 +133,15 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { return Status::OK(); } +Result ExportDeviceType(const std::shared_ptr& arr) { + if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) { + return DLDeviceType::kDLCPU; + } else { + return Status::NotImplemented( + "DLPack support is implemented only for buffers on CPU device."); + } +} + } // namespace dlpack } // namespace arrow diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 8988ed63c1a..93468544f68 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -43,6 +43,14 @@ namespace dlpack { ARROW_EXPORT Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); +/// \brief Get DLDeviceType enumerator specifying the +/// type of the device data is stored on. +/// +/// \param[in] arr Arrow array +/// \return DLDeviceType +ARROW_EXPORT +Result ExportDeviceType(const std::shared_ptr& arr); + } // namespace dlpack } // namespace arrow diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index cd1877ce0f1..5f28f5dad70 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -74,6 +74,9 @@ auto check_dlptensor = [](const std::shared_ptr& arr, ASSERT_EQ(1, dltensor.dtype.lanes); ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type); ASSERT_EQ(0, dltensor.device.device_id); + + ASSERT_OK_AND_ASSIGN(auto device_type, arrow::dlpack::ExportDeviceType(arr)); + ASSERT_EQ(DLDeviceType::kDLCPU, device_type); }; TEST_F(TestExportArray, TestSupportedArray) { diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index a6f605aa2ef..5fda8c82e8f 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -53,3 +53,8 @@ cpdef object to_dlpack(Array arr) except *: check_status(ExportToDLPack(pyarrow_unwrap_array(arr), &dlm_tensor)) return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) + +cpdef object to_dlpack(Array arr) except *: + + cdef DLDeviceType device_type + return GetResultValue(ExportDeviceType(pyarrow_unwrap_array(arr))) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index dced7a981e3..2c086621ae0 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1800,6 +1800,12 @@ cdef class Array(_PandasConvertible): "Only stream=None is supported." ) + def __dlpack_device__(self): + """ + Performs the operation __dlpack_device__. + """ + return dlpack_device(self) + cdef _array_like_to_pandas(obj, options, types_mapper): cdef: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index ffc46d91c4c..5d127e5d093 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1200,6 +1200,22 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef extern from "arrow/c/dlpack_abi.h" nogil: + cdef enum DLDeviceType: + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + kDLOneAPI = 14 + kDLWebGPU = 15 + kDLHexagon = 16 + ctypedef struct DLManagedTensor: void (*deleter)(DLManagedTensor*) @@ -1208,6 +1224,8 @@ cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil: CStatus ExportToDLPack" arrow::dlpack::ExportArray"(const shared_ptr[CArray]& arr, DLManagedTensor** out) + CResult[DLDeviceType] ExportDeviceType(const shared_ptr[CArray]& arr) + cdef extern from "arrow/builder.h" namespace "arrow" nogil: diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 724b0c3ea21..8be0916fcd8 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3583,6 +3583,8 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected, strict=True) + assert arr.__dlpack_device__() == 1 + arr_sliced = arr.slice(1, 1) DLTensor = arr_sliced.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True @@ -3590,6 +3592,8 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) + assert arr.__dlpack_device__() == 1 + arr_sliced = arr.slice(0, 1) DLTensor = arr_sliced.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True @@ -3597,6 +3601,8 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) + assert arr.__dlpack_device__() == 1 + arr_sliced = arr.slice(1) DLTensor = arr_sliced.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True @@ -3604,6 +3610,8 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) + assert arr.__dlpack_device__() == 1 + arr_zero = pa.array([], type=value_type) DLTensor = arr_zero.__dlpack__() assert PyCapsule_IsValid(DLTensor, b"dltensor") is True @@ -3611,6 +3619,8 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_zero) np.testing.assert_array_equal(result, expected, strict=True) + assert arr.__dlpack_device__() == 1 + def test_dlpack_not_supported(): if Version(np.__version__) < Version("1.22.0"): @@ -3654,3 +3664,7 @@ def test_dlpack_cuda_not_supported(): with pytest.raises(NotImplementedError, match="DLPack support is implemented " "only for buffers on CPU device."): np.from_dlpack(carr) + + with pytest.raises(NotImplementedError, match="DLPack support is implemented " + "only for buffers on CPU device."): + carr.__dlpack_device__() From 55246ea9c4d52212d43a507705f741b9f7dbb1ec Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 18:24:04 +0100 Subject: [PATCH 43/73] Update __dlpack_device__ - ExportDevice --- cpp/src/arrow/c/dlpack.cc | 8 ++++++-- cpp/src/arrow/c/dlpack.h | 9 +++++---- cpp/src/arrow/c/dlpack_test.cc | 6 ++++-- python/pyarrow/_dlpack.pxi | 8 +++++--- python/pyarrow/array.pxi | 6 ++++++ python/pyarrow/includes/libarrow.pxd | 21 ++++++--------------- python/pyarrow/tests/test_array.py | 10 +++++----- 7 files changed, 37 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index a5e01d70bee..7b52b6090cd 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -133,9 +133,13 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { return Status::OK(); } -Result ExportDeviceType(const std::shared_ptr& arr) { +Status ExportDevice(const std::shared_ptr& arr, DLDevice* out) { + DLDevice device; if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) { - return DLDeviceType::kDLCPU; + device.device_id = 0; + device.device_type = DLDeviceType::kDLCPU; + *out = device; + return Status::OK(); } else { return Status::NotImplemented( "DLPack support is implemented only for buffers on CPU device."); diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 93468544f68..fc15b6c58cd 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -43,13 +43,14 @@ namespace dlpack { ARROW_EXPORT Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); -/// \brief Get DLDeviceType enumerator specifying the -/// type of the device data is stored on. +/// \brief Get DLDevice with enumerator specifying the +/// type of the device data is stored on and index of the +/// device which is 0 by default for CPU. /// /// \param[in] arr Arrow array -/// \return DLDeviceType +/// \return DLDevice ARROW_EXPORT -Result ExportDeviceType(const std::shared_ptr& arr); +Status ExportDevice(const std::shared_ptr& arr, DLDevice* out); } // namespace dlpack diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 5f28f5dad70..614fba17df9 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -75,8 +75,10 @@ auto check_dlptensor = [](const std::shared_ptr& arr, ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type); ASSERT_EQ(0, dltensor.device.device_id); - ASSERT_OK_AND_ASSIGN(auto device_type, arrow::dlpack::ExportDeviceType(arr)); - ASSERT_EQ(DLDeviceType::kDLCPU, device_type); + DLDevice device; + ASSERT_OK(arrow::dlpack::ExportDevice(arr, &device)); + ASSERT_EQ(DLDeviceType::kDLCPU, device.device_type); + ASSERT_EQ(0, device.device_id); }; TEST_F(TestExportArray, TestSupportedArray) { diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 5fda8c82e8f..43559f574ea 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -54,7 +54,9 @@ cpdef object to_dlpack(Array arr) except *: return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) -cpdef object to_dlpack(Array arr) except *: +cpdef object dlpack_device(Array arr) except *: + + cdef DLDevice device + check_status(ExportDevice(pyarrow_unwrap_array(arr), &device)) - cdef DLDeviceType device_type - return GetResultValue(ExportDeviceType(pyarrow_unwrap_array(arr))) + return (device.device_type, device.device_id) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 2c086621ae0..e0391090bf1 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1803,6 +1803,12 @@ cdef class Array(_PandasConvertible): def __dlpack_device__(self): """ Performs the operation __dlpack_device__. + + Returns + ------- + tuple : Tuple[DLDeviceType, int] + Tuple with enumerator specifying the type of the device + and index of the device which is 0 by default for CPU. """ return dlpack_device(self) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 5d127e5d093..8210a78ee07 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1200,21 +1200,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef extern from "arrow/c/dlpack_abi.h" nogil: - cdef enum DLDeviceType: + ctypedef enum DLDeviceType: kDLCPU = 1 - kDLCUDA = 2 - kDLCUDAHost = 3 - kDLOpenCL = 4 - kDLVulkan = 7 - kDLMetal = 8 - kDLVPI = 9 - kDLROCM = 10 - kDLROCMHost = 11 - kDLExtDev = 12 - kDLCUDAManaged = 13 - kDLOneAPI = 14 - kDLWebGPU = 15 - kDLHexagon = 16 + + ctypedef struct DLDevice: + DLDeviceType device_type + int32_t device_id ctypedef struct DLManagedTensor: void (*deleter)(DLManagedTensor*) @@ -1224,7 +1215,7 @@ cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil: CStatus ExportToDLPack" arrow::dlpack::ExportArray"(const shared_ptr[CArray]& arr, DLManagedTensor** out) - CResult[DLDeviceType] ExportDeviceType(const shared_ptr[CArray]& arr) + CStatus ExportDevice(const shared_ptr[CArray]& arr, DLDevice* out) cdef extern from "arrow/builder.h" namespace "arrow" nogil: diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8be0916fcd8..3150eb006e9 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3583,7 +3583,7 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr) np.testing.assert_array_equal(result, expected, strict=True) - assert arr.__dlpack_device__() == 1 + assert arr.__dlpack_device__() == (1, 0) arr_sliced = arr.slice(1, 1) DLTensor = arr_sliced.__dlpack__() @@ -3592,7 +3592,7 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) - assert arr.__dlpack_device__() == 1 + assert arr.__dlpack_device__() == (1, 0) arr_sliced = arr.slice(0, 1) DLTensor = arr_sliced.__dlpack__() @@ -3601,7 +3601,7 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) - assert arr.__dlpack_device__() == 1 + assert arr.__dlpack_device__() == (1, 0) arr_sliced = arr.slice(1) DLTensor = arr_sliced.__dlpack__() @@ -3610,7 +3610,7 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_sliced) np.testing.assert_array_equal(result, expected, strict=True) - assert arr.__dlpack_device__() == 1 + assert arr.__dlpack_device__() == (1, 0) arr_zero = pa.array([], type=value_type) DLTensor = arr_zero.__dlpack__() @@ -3619,7 +3619,7 @@ def test_dlpack(value_type, np_type): result = np.from_dlpack(arr_zero) np.testing.assert_array_equal(result, expected, strict=True) - assert arr.__dlpack_device__() == 1 + assert arr.__dlpack_device__() == (1, 0) def test_dlpack_not_supported(): From 0fa84de17ad8fedb76adaa6c7257c1bf921ececb Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 18:53:23 +0100 Subject: [PATCH 44/73] Add documentation on the python side --- cpp/src/arrow/c/dlpack.h | 1 + docs/source/python/dlpack.rst | 82 +++++++++++++++++++++ docs/source/python/index.rst | 1 + docs/source/python/interchange_protocol.rst | 6 +- 4 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 docs/source/python/dlpack.rst diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index fc15b6c58cd..2ed0a296879 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -48,6 +48,7 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); /// device which is 0 by default for CPU. /// /// \param[in] arr Arrow array +/// \param[out] out DLDevice struct /// \return DLDevice ARROW_EXPORT Status ExportDevice(const std::shared_ptr& arr, DLDevice* out); diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst new file mode 100644 index 00000000000..3e70190e8e6 --- /dev/null +++ b/docs/source/python/dlpack.rst @@ -0,0 +1,82 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _pyarrow-dlpack: + +The DLPack Protocol +=================== + +Producing side of the DLPack Protocol is implemented for ``pa.Array`` +and can be used to interchange data between PyArrow and other tensor +libraries. The data structures that are supported in the implementation +of the protocol are integer, unsigned integer and float arrays. The +protocol has no missing data support meaning PyArrow arrays with +validity mask can not be used to transfer data through the DLPack +protocol. Currently Arrow implementation of the protocol only supports +data on a CPU device. + +The DLPack Protocol is +`selected as the Python array API standard `_ +by the +`Consortium for Python Data API Standards `_ +in order to enable device aware data interchange between array/tensor +libraries in the Python ecosystem. Being device aware allows exchange +of data on devices other than the CPU (e.g. GPU). See more about the standard +in the +`protocol documentation `_ +and more about the DLPack in the +`Python Specification for DLPack `_. + +Data interchange syntax of the protocol includes + +1. ``from_dlpack(x)``: consuming an array object that implements a ``__dlpack__`Ã¥` method + and creating a new array while sharing the memory. + +2. ``__dlpack__(self, stream=None)`` and ``__dlpack_device__``: producing a PyCapsule with + the DLPack struct which is called from within ``from_dlpack(x)``. + +PyArrow implements the second part of the protocol (``__dlpack__(self, stream=None)`` and +``__dlpack_device__``). + +Example +------- + +Convert a PyArrow CPU array to NumPy array: + +.. code-block:: + + >>> import pyarrow as pa + >>> array = pa.array([2, 0, 2, 4]) + + [ + 2, + 0, + 2, + 4 + ] + + >>> import numpy as np + >>> np.from_dlpack(array) + array([2, 0, 2, 4]) + +Convert a PyArrow CPU array to PyTorch tensor: + +.. code-block:: + + >>> import torch + >>> torch.from_dlpack(array) + tensor([2, 0, 2, 4]) diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst index 6a3de3d42b1..08939bc760d 100644 --- a/docs/source/python/index.rst +++ b/docs/source/python/index.rst @@ -53,6 +53,7 @@ files into Arrow structures. numpy pandas interchange_protocol + dlpack timestamps orc csv diff --git a/docs/source/python/interchange_protocol.rst b/docs/source/python/interchange_protocol.rst index e293699220c..de45de444eb 100644 --- a/docs/source/python/interchange_protocol.rst +++ b/docs/source/python/interchange_protocol.rst @@ -37,7 +37,7 @@ libraries in the Python ecosystem. See more about the standard in the `protocol documentation `_. -From pyarrow to other libraries: ``__dataframe__()`` method +From PyArrow to other libraries: ``__dataframe__()`` method ----------------------------------------------------------- The ``__dataframe__()`` method creates a new exchange object that @@ -54,7 +54,7 @@ This is meant to be used by the consumer library when calling the ``from_dataframe()`` function and is not meant to be used manually by the user. -From other libraries to pyarrow: ``from_dataframe()`` +From other libraries to PyArrow: ``from_dataframe()`` ----------------------------------------------------- With the ``from_dataframe()`` function, we can construct a :class:`pyarrow.Table` @@ -63,7 +63,7 @@ from any dataframe object that implements the protocol. We can for example take a pandas dataframe and construct a -pyarrow table with the use of the interchange protocol: +PyArrow table with the use of the interchange protocol: .. code-block:: From 010f28e8600024879a9b6ccb33d225973192cacf Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 30 Nov 2023 18:59:19 +0100 Subject: [PATCH 45/73] Update C++ tests to use vector instead of function for expected data --- cpp/src/arrow/c/dlpack_test.cc | 43 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 614fba17df9..8147758b307 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -34,22 +34,6 @@ class TestExportArray : public ::testing::Test { void SetUp() {} }; -static std::vector> TestExportArrayAgainstTheseTypes() { - return { - int8(), uint8(), int16(), uint16(), int32(), uint32(), - int64(), uint64(), float16(), float32(), float64(), - }; -} - -static std::vector TestExpectedDLPackDataTypes() { - return { - DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, - DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, - DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLFloat, - DLDataTypeCode::kDLFloat, DLDataTypeCode::kDLFloat, - }; -} - auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr arrow_type, DLDataTypeCode dlpack_type, int64_t length) { @@ -84,20 +68,27 @@ auto check_dlptensor = [](const std::shared_ptr& arr, TEST_F(TestExportArray, TestSupportedArray) { random::RandomArrayGenerator gen(0); + std::vector> arrow_types = { + int8(), uint8(), int16(), uint16(), int32(), uint32(), + int64(), uint64(), float16(), float32(), float64(), + }; + + std::vector dlpack_types = { + DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, + DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, + DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLFloat, + DLDataTypeCode::kDLFloat, DLDataTypeCode::kDLFloat, + }; + for (int64_t i = 0; i < 11; ++i) { - const std::shared_ptr array = - gen.ArrayOf(TestExportArrayAgainstTheseTypes()[i], 10, 0); - check_dlptensor(array, TestExportArrayAgainstTheseTypes()[i], - TestExpectedDLPackDataTypes()[i], 10); + const std::shared_ptr array = gen.ArrayOf(arrow_types[i], 10, 0); + check_dlptensor(array, arrow_types[i], dlpack_types[i], 10); ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); - check_dlptensor(sliced_1, TestExportArrayAgainstTheseTypes()[i], - TestExpectedDLPackDataTypes()[i], 5); + check_dlptensor(sliced_1, arrow_types[i], dlpack_types[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); - check_dlptensor(sliced_2, TestExportArrayAgainstTheseTypes()[i], - TestExpectedDLPackDataTypes()[i], 5); + check_dlptensor(sliced_2, arrow_types[i], dlpack_types[i], 5); ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); - check_dlptensor(sliced_3, TestExportArrayAgainstTheseTypes()[i], - TestExpectedDLPackDataTypes()[i], 7); + check_dlptensor(sliced_3, arrow_types[i], dlpack_types[i], 7); } } From 53f2867fcb9662c681c6b7b7be96536dcba411cd Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Fri, 1 Dec 2023 08:59:21 +0100 Subject: [PATCH 46/73] Add a deleter to the test --- cpp/src/arrow/c/dlpack_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 8147758b307..199a943347f 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -63,6 +63,8 @@ auto check_dlptensor = [](const std::shared_ptr& arr, ASSERT_OK(arrow::dlpack::ExportDevice(arr, &device)); ASSERT_EQ(DLDeviceType::kDLCPU, device.device_type); ASSERT_EQ(0, device.device_id); + + dlmtensor->deleter(dlmtensor); }; TEST_F(TestExportArray, TestSupportedArray) { From f9fbf2c204af0fdb9b04e987edfe81d54ffb0614 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 5 Dec 2023 12:25:07 +0100 Subject: [PATCH 47/73] Include suggested changes from Joris --- docs/source/python/dlpack.rst | 49 +++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst index 3e70190e8e6..8beb76dd3ed 100644 --- a/docs/source/python/dlpack.rst +++ b/docs/source/python/dlpack.rst @@ -20,37 +20,48 @@ The DLPack Protocol =================== -Producing side of the DLPack Protocol is implemented for ``pa.Array`` -and can be used to interchange data between PyArrow and other tensor -libraries. The data structures that are supported in the implementation -of the protocol are integer, unsigned integer and float arrays. The -protocol has no missing data support meaning PyArrow arrays with -validity mask can not be used to transfer data through the DLPack -protocol. Currently Arrow implementation of the protocol only supports -data on a CPU device. - -The DLPack Protocol is -`selected as the Python array API standard `_ +The DLPack Protocol is a stable in-memory data structure +that allows exchange between major frameworks working +with multidimensional arrays or tensors. It is +designed for cross hardware support meaning it allows exchange +of data on devices other than the CPU (e.g. GPU). + +DLPack protocol had been +`selected as the Python array API standard `_ by the `Consortium for Python Data API Standards `_ in order to enable device aware data interchange between array/tensor -libraries in the Python ecosystem. Being device aware allows exchange -of data on devices other than the CPU (e.g. GPU). See more about the standard +libraries in the Python ecosystem. See more about the standard in the `protocol documentation `_ and more about the DLPack in the `Python Specification for DLPack `_. +Implementation of DLPack in PyArrow +----------------------------------- + +Producing side of the DLPack Protocol is implemented for ``pa.Array`` +and can be used to interchange data between PyArrow and other tensor +libraries. The data structures that are supported in the implementation +of the protocol are integer, unsigned integer and float arrays. The +protocol has no missing data support meaning PyArrow arrays with +missing values cannot be used to transfer data through the DLPack +protocol. Currently Arrow implementation of the protocol only supports +data on a CPU device. + Data interchange syntax of the protocol includes -1. ``from_dlpack(x)``: consuming an array object that implements a ``__dlpack__`Ã¥` method - and creating a new array while sharing the memory. +1. ``from_dlpack(x)``: consuming an array object that implements a + ``__dlpack__`` method and creating a new array while sharing the + memory. -2. ``__dlpack__(self, stream=None)`` and ``__dlpack_device__``: producing a PyCapsule with - the DLPack struct which is called from within ``from_dlpack(x)``. +2. ``__dlpack__(self, stream=None)`` and ``__dlpack_device__``: + producing a PyCapsule with the DLPack struct which is called from + within ``from_dlpack(x)``. -PyArrow implements the second part of the protocol (``__dlpack__(self, stream=None)`` and -``__dlpack_device__``). +PyArrow implements the second part of the protocol +(``__dlpack__(self, stream=None)`` and ``__dlpack_device__``) and can +thus be consumed by libraries implementing ``from_dlpack``. Example ------- From ab885499c8dc0dd92ecc6514c6563fcdeffde4c6 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 5 Dec 2023 13:16:17 +0100 Subject: [PATCH 48/73] Use C++17 nested namespace declarations --- cpp/src/arrow/c/dlpack.cc | 8 ++------ cpp/src/arrow/c/dlpack.h | 8 ++------ cpp/src/arrow/c/dlpack_test.cc | 8 ++------ 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 7b52b6090cd..5b71d5bbe7c 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -22,9 +22,7 @@ #include "arrow/device.h" #include "arrow/type.h" -namespace arrow { - -namespace dlpack { +namespace arrow::dlpack { Status getDLDataType(const std::shared_ptr& type, DLDataType* out) { DLDataType dtype; @@ -146,6 +144,4 @@ Status ExportDevice(const std::shared_ptr& arr, DLDevice* out) { } } -} // namespace dlpack - -} // namespace arrow +} // namespace arrow::dlpack diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 2ed0a296879..28dccd8aae7 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -20,9 +20,7 @@ #include "arrow/array/array_base.h" #include "arrow/c/dlpack_abi.h" -namespace arrow { - -namespace dlpack { +namespace arrow::dlpack { /// \brief DLPack protocol for producing DLManagedTensor /// @@ -53,6 +51,4 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); ARROW_EXPORT Status ExportDevice(const std::shared_ptr& arr, DLDevice* out); -} // namespace dlpack - -} // namespace arrow +} // namespace arrow::dlpack diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 199a943347f..9854096e488 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -23,9 +23,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" -namespace arrow { - -namespace dlpack { +namespace arrow::dlpack { // using ExportArray = arrow::dlpack::ExportArray; @@ -117,6 +115,4 @@ TEST_F(TestExportArray, TestUnSupportedArray) { arrow::dlpack::ExportArray(array_boolean, &dlmtensor_3)); } -} // namespace dlpack - -} // namespace arrow +} // namespace arrow::dlpack From c832eddd38a7dae32a1a87fc3de3ade698559f88 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 5 Dec 2023 13:57:29 +0100 Subject: [PATCH 49/73] Return Result instead of Status --- cpp/src/arrow/c/dlpack.cc | 10 ++++------ cpp/src/arrow/c/dlpack.h | 10 ++++------ cpp/src/arrow/c/dlpack_test.cc | 16 ++++++---------- python/pyarrow/_dlpack.pxi | 10 ++++++---- python/pyarrow/includes/libarrow.pxd | 6 +++--- 5 files changed, 23 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 5b71d5bbe7c..38abf86cb26 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -69,7 +69,7 @@ static void deleter(DLManagedTensor* arg) { delete static_cast(arg->manager_ctx); } -Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { +Result ExportArray(const std::shared_ptr& arr) { if (arr->null_count() > 0) { return Status::TypeError("Can only use __dlpack__ on arrays with no nulls."); } @@ -127,17 +127,15 @@ Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out) { dlm_tensor->dl_tensor.strides = NULL; dlm_tensor->dl_tensor.byte_offset = 0; - *out = dlm_tensor; - return Status::OK(); + return dlm_tensor; } -Status ExportDevice(const std::shared_ptr& arr, DLDevice* out) { +Result ExportDevice(const std::shared_ptr& arr) { DLDevice device; if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) { device.device_id = 0; device.device_type = DLDeviceType::kDLCPU; - *out = device; - return Status::OK(); + return device; } else { return Status::NotImplemented( "DLPack support is implemented only for buffers on CPU device."); diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 28dccd8aae7..03b6c9c6120 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -36,19 +36,17 @@ namespace arrow::dlpack { /// are not supported. /// /// \param[in] arr Arrow array -/// \param[out] out DLManagedTensor struct -/// \return Status +/// \return DLManagedTensor struct ARROW_EXPORT -Status ExportArray(const std::shared_ptr& arr, DLManagedTensor** out); +Result ExportArray(const std::shared_ptr& arr); /// \brief Get DLDevice with enumerator specifying the /// type of the device data is stored on and index of the /// device which is 0 by default for CPU. /// /// \param[in] arr Arrow array -/// \param[out] out DLDevice struct -/// \return DLDevice +/// \return DLDevice struct ARROW_EXPORT -Status ExportDevice(const std::shared_ptr& arr, DLDevice* out); +Result ExportDevice(const std::shared_ptr& arr); } // namespace arrow::dlpack diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 9854096e488..44058fbaa2d 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -35,8 +35,8 @@ class TestExportArray : public ::testing::Test { auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr arrow_type, DLDataTypeCode dlpack_type, int64_t length) { - DLManagedTensor* dlmtensor; - ASSERT_OK(arrow::dlpack::ExportArray(arr, &dlmtensor)); + ASSERT_OK_AND_ASSIGN(auto dlmtensor, + arrow::dlpack::ExportArray(arr)); auto dltensor = dlmtensor->dl_tensor; const auto byte_width = arr->type()->byte_width(); @@ -57,8 +57,7 @@ auto check_dlptensor = [](const std::shared_ptr& arr, ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type); ASSERT_EQ(0, dltensor.device.device_id); - DLDevice device; - ASSERT_OK(arrow::dlpack::ExportDevice(arr, &device)); + ASSERT_OK_AND_ASSIGN(auto device, arrow::dlpack::ExportDevice(arr)); ASSERT_EQ(DLDeviceType::kDLCPU, device.device_type); ASSERT_EQ(0, device.device_id); @@ -96,23 +95,20 @@ TEST_F(TestExportArray, TestUnSupportedArray) { random::RandomArrayGenerator gen(0); const std::shared_ptr array_with_null = gen.Int8(10, 1, 100, 1); - DLManagedTensor* dlmtensor_1; ASSERT_RAISES_WITH_MESSAGE( TypeError, "Type error: Can only use __dlpack__ on arrays with no nulls.", - arrow::dlpack::ExportArray(array_with_null, &dlmtensor_1)); + arrow::dlpack::ExportArray(array_with_null)); const std::shared_ptr array_string = gen.String(10, 0, 10, 0); - DLManagedTensor* dlmtensor_2; ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: Can only use __dlpack__ on primitive arrays " "without NullType and Decimal types.", - arrow::dlpack::ExportArray(array_string, &dlmtensor_2)); + arrow::dlpack::ExportArray(array_string)); const std::shared_ptr array_boolean = gen.Boolean(10, 0.5, 0); - DLManagedTensor* dlmtensor_3; ASSERT_RAISES_WITH_MESSAGE( TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", - arrow::dlpack::ExportArray(array_boolean, &dlmtensor_3)); + arrow::dlpack::ExportArray(array_boolean)); } } // namespace arrow::dlpack diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 43559f574ea..93dc26df175 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -49,14 +49,16 @@ cdef void pycapsule_deleter(object dltensor) noexcept: cpdef object to_dlpack(Array arr) except *: - cdef DLManagedTensor* dlm_tensor - check_status(ExportToDLPack(pyarrow_unwrap_array(arr), &dlm_tensor)) + cdef CResult[DLManagedTensor*] c_dlm_tensor + c_dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(arr)) + dlm_tensor = GetResultValue(c_dlm_tensor) return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) cpdef object dlpack_device(Array arr) except *: - cdef DLDevice device - check_status(ExportDevice(pyarrow_unwrap_array(arr), &device)) + cdef CResult[DLDevice] c_device + c_device = ExportDevice(pyarrow_unwrap_array(arr)) + device = GetResultValue(c_device) return (device.device_type, device.device_id) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8210a78ee07..af9409ee0b4 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1212,10 +1212,10 @@ cdef extern from "arrow/c/dlpack_abi.h" nogil: cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil: - CStatus ExportToDLPack" arrow::dlpack::ExportArray"(const shared_ptr[CArray]& arr, - DLManagedTensor** out) + CResult[DLManagedTensor*] ExportToDLPack" arrow::dlpack::ExportArray"( + const shared_ptr[CArray]& arr) - CStatus ExportDevice(const shared_ptr[CArray]& arr, DLDevice* out) + CResult[DLDevice] ExportDevice(const shared_ptr[CArray]& arr) cdef extern from "arrow/builder.h" namespace "arrow" nogil: From 0089e23084eef169385957f2d1868eaec0815a28 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 5 Dec 2023 14:18:12 +0100 Subject: [PATCH 50/73] Update GetDLDataType --- cpp/src/arrow/c/dlpack.cc | 24 ++++++++++-------------- cpp/src/arrow/c/dlpack_test.cc | 7 +++---- python/pyarrow/tests/test_array.py | 4 ++-- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 38abf86cb26..e4a2a8226b4 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -24,38 +24,34 @@ namespace arrow::dlpack { -Status getDLDataType(const std::shared_ptr& type, DLDataType* out) { +Result GetDLDataType(const DataType& type) { DLDataType dtype; dtype.lanes = 1; - dtype.bits = type->bit_width(); - switch (type->id()) { + dtype.bits = type.bit_width(); + switch (type.id()) { case Type::INT8: case Type::INT16: case Type::INT32: case Type::INT64: dtype.code = DLDataTypeCode::kDLInt; - *out = dtype; - return Status::OK(); + return dtype; case Type::UINT8: case Type::UINT16: case Type::UINT32: case Type::UINT64: dtype.code = DLDataTypeCode::kDLUInt; - *out = dtype; - return Status::OK(); + return dtype; case Type::HALF_FLOAT: case Type::FLOAT: case Type::DOUBLE: dtype.code = DLDataTypeCode::kDLFloat; - *out = dtype; - return Status::OK(); + return dtype; case Type::BOOL: // DLPack supports byte-packed boolean values return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); default: return Status::TypeError( - "Can only use __dlpack__ on primitive arrays without NullType and Decimal " - "types."); + "DataType is not compatible with DLPack spec: ", type.ToString()); } } @@ -76,8 +72,8 @@ Result ExportArray(const std::shared_ptr& arr) { // Define the DLDataType struct // Supported data types: int, uint, float - DLDataType arr_type; - RETURN_NOT_OK(getDLDataType(arr->type(), &arr_type)); + const DataType* arrow_type = arr->type().get(); + ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(*arrow_type)); // Create DLMTensorCtx struct with the reference to // the data of the array @@ -119,7 +115,7 @@ Result ExportArray(const std::shared_ptr& arr) { dlm_tensor->dl_tensor.device = ctx; dlm_tensor->dl_tensor.ndim = 1; - dlm_tensor->dl_tensor.dtype = arr_type; + dlm_tensor->dl_tensor.dtype = dlpack_type; std::vector* shape_arr = &DLMTensor->shape; shape_arr->resize(1); (*shape_arr)[0] = arr->length(); diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 44058fbaa2d..faf16c0061f 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -35,8 +35,7 @@ class TestExportArray : public ::testing::Test { auto check_dlptensor = [](const std::shared_ptr& arr, std::shared_ptr arrow_type, DLDataTypeCode dlpack_type, int64_t length) { - ASSERT_OK_AND_ASSIGN(auto dlmtensor, - arrow::dlpack::ExportArray(arr)); + ASSERT_OK_AND_ASSIGN(auto dlmtensor, arrow::dlpack::ExportArray(arr)); auto dltensor = dlmtensor->dl_tensor; const auto byte_width = arr->type()->byte_width(); @@ -101,8 +100,8 @@ TEST_F(TestExportArray, TestUnSupportedArray) { const std::shared_ptr array_string = gen.String(10, 0, 10, 0); ASSERT_RAISES_WITH_MESSAGE(TypeError, - "Type error: Can only use __dlpack__ on primitive arrays " - "without NullType and Decimal types.", + "Type error: DataType is not compatible with DLPack spec: " + + array_string->type()->ToString(), arrow::dlpack::ExportArray(array_string)); const std::shared_ptr array_boolean = gen.Boolean(10, 0.5, 0); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 3150eb006e9..cd3d8a54ca4 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3635,11 +3635,11 @@ def test_dlpack_not_supported(): [[0, 1], [3, 4]], type=pa.list_(pa.int32()) ) - with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): + with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): np.from_dlpack(arr) arr = pa.array([]) - with pytest.raises(TypeError, match="Can only use __dlpack__ on primitive arrays"): + with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): np.from_dlpack(arr) # DLPack doesn't support bit-packed boolean values From 804878fd7ffe869f691f694fb6702a5576246ae5 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 5 Dec 2023 14:30:32 +0100 Subject: [PATCH 51/73] Fix Python spelling in C++ --- cpp/src/arrow/c/dlpack.cc | 2 +- cpp/src/arrow/c/dlpack_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index e4a2a8226b4..4fc287bf660 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -67,7 +67,7 @@ static void deleter(DLManagedTensor* arg) { Result ExportArray(const std::shared_ptr& arr) { if (arr->null_count() > 0) { - return Status::TypeError("Can only use __dlpack__ on arrays with no nulls."); + return Status::TypeError("Can only use DLPack on arrays with no nulls."); } // Define the DLDataType struct diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index faf16c0061f..c1b07fdbc84 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -95,7 +95,7 @@ TEST_F(TestExportArray, TestUnSupportedArray) { const std::shared_ptr array_with_null = gen.Int8(10, 1, 100, 1); ASSERT_RAISES_WITH_MESSAGE( - TypeError, "Type error: Can only use __dlpack__ on arrays with no nulls.", + TypeError, "Type error: Can only use DLPack on arrays with no nulls.", arrow::dlpack::ExportArray(array_with_null)); const std::shared_ptr array_string = gen.String(10, 0, 10, 0); From fea6fe3b921fead6803d974f435967d97bf1357c Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 08:39:28 +0100 Subject: [PATCH 52/73] Use ExportDevice to define the device struct in ExportArray --- cpp/src/arrow/c/dlpack.cc | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 4fc287bf660..b456fd49052 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -104,15 +104,8 @@ Result ExportArray(const std::shared_ptr& arr) { } // Define DLDevice struct - DLDevice ctx; - if (array_ref->buffers[1]->device_type() == DeviceAllocationType::kCPU) { - ctx.device_id = 0; - ctx.device_type = DLDeviceType::kDLCPU; - } else { - return Status::NotImplemented( - "DLPack support is implemented only for buffers on CPU device."); - } - dlm_tensor->dl_tensor.device = ctx; + ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) + dlm_tensor->dl_tensor.device = device; dlm_tensor->dl_tensor.ndim = 1; dlm_tensor->dl_tensor.dtype = dlpack_type; From 8071c9bc7ffa45512c2c97b093a49b278fa8f606 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 08:41:59 +0100 Subject: [PATCH 53/73] Fix tensor shape --- cpp/src/arrow/c/dlpack.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index b456fd49052..69edbda68a9 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -109,10 +109,7 @@ Result ExportArray(const std::shared_ptr& arr) { dlm_tensor->dl_tensor.ndim = 1; dlm_tensor->dl_tensor.dtype = dlpack_type; - std::vector* shape_arr = &DLMTensor->shape; - shape_arr->resize(1); - (*shape_arr)[0] = arr->length(); - dlm_tensor->dl_tensor.shape = shape_arr->data(); + dlm_tensor->dl_tensor.shape = const_cast(&array_ref->length); dlm_tensor->dl_tensor.strides = NULL; dlm_tensor->dl_tensor.byte_offset = 0; From 9f212085168814c190964f01fe86d118c8a0dc59 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 08:42:31 +0100 Subject: [PATCH 54/73] Linter --- cpp/src/arrow/c/dlpack.cc | 4 ++-- cpp/src/arrow/c/dlpack_test.cc | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 69edbda68a9..70e8be33686 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -50,8 +50,8 @@ Result GetDLDataType(const DataType& type) { // DLPack supports byte-packed boolean values return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); default: - return Status::TypeError( - "DataType is not compatible with DLPack spec: ", type.ToString()); + return Status::TypeError("DataType is not compatible with DLPack spec: ", + type.ToString()); } } diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index c1b07fdbc84..dc8c282e0c2 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -94,9 +94,9 @@ TEST_F(TestExportArray, TestUnSupportedArray) { random::RandomArrayGenerator gen(0); const std::shared_ptr array_with_null = gen.Int8(10, 1, 100, 1); - ASSERT_RAISES_WITH_MESSAGE( - TypeError, "Type error: Can only use DLPack on arrays with no nulls.", - arrow::dlpack::ExportArray(array_with_null)); + ASSERT_RAISES_WITH_MESSAGE(TypeError, + "Type error: Can only use DLPack on arrays with no nulls.", + arrow::dlpack::ExportArray(array_with_null)); const std::shared_ptr array_string = gen.String(10, 0, 10, 0); ASSERT_RAISES_WITH_MESSAGE(TypeError, From 8a10e688b92ad6ed9811017ff3a1e9c9ab0daabf Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 09:28:23 +0100 Subject: [PATCH 55/73] Move the methods from _dlpack.pxi to array.pxi and fix a typo in Python test --- python/pyarrow/_dlpack.pxi | 17 ----------------- python/pyarrow/array.pxi | 13 +++++++++++-- python/pyarrow/tests/test_array.py | 2 +- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 93dc26df175..2122b516af1 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -45,20 +45,3 @@ cdef void pycapsule_deleter(object dltensor) noexcept: # Set the error indicator from err_type, err_value, err_traceback cpython.PyErr_Restore(err_type, err_value, err_traceback) - - -cpdef object to_dlpack(Array arr) except *: - - cdef CResult[DLManagedTensor*] c_dlm_tensor - c_dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(arr)) - dlm_tensor = GetResultValue(c_dlm_tensor) - - return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) - -cpdef object dlpack_device(Array arr) except *: - - cdef CResult[DLDevice] c_device - c_device = ExportDevice(pyarrow_unwrap_array(arr)) - device = GetResultValue(c_device) - - return (device.device_type, device.device_id) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index e0391090bf1..c08c0b83a3f 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1793,8 +1793,13 @@ cdef class Array(_PandasConvertible): capsule : PyCapsule A DLPack capsule for the array, containing a DLPackManagedTensor. """ + cdef CResult[DLManagedTensor*] c_dlm_tensor + if stream is None: - return to_dlpack(self) + c_dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(self)) + dlm_tensor = GetResultValue(c_dlm_tensor) + + return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) else: raise NotImplementedError( "Only stream=None is supported." @@ -1810,7 +1815,11 @@ cdef class Array(_PandasConvertible): Tuple with enumerator specifying the type of the device and index of the device which is 0 by default for CPU. """ - return dlpack_device(self) + cdef CResult[DLDevice] c_device + c_device = ExportDevice(pyarrow_unwrap_array(self)) + device = GetResultValue(c_device) + + return (device.device_type, device.device_id) cdef _array_like_to_pandas(obj, options, types_mapper): diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index cd3d8a54ca4..30e5580e147 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3627,7 +3627,7 @@ def test_dlpack_not_supported(): pytest.skip("No dlpack support in numpy versions older than 1.22.0.") arr = pa.array([1, None, 3]) - with pytest.raises(TypeError, match="Can only use __dlpack__ " + with pytest.raises(TypeError, match="Can only use DLPack " "on arrays with no nulls."): np.from_dlpack(arr) From 672043b60891a0bae6c784f1c0cfb2a1920371ef Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 09:30:26 +0100 Subject: [PATCH 56/73] Fix capsule docstring and a typo in tests --- python/pyarrow/array.pxi | 2 +- python/pyarrow/tests/test_array.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index c08c0b83a3f..c067b43033e 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1791,7 +1791,7 @@ cdef class Array(_PandasConvertible): Returns ------- capsule : PyCapsule - A DLPack capsule for the array, containing a DLPackManagedTensor. + A DLPack capsule for the array, pointing to a DLManagedTensor. """ cdef CResult[DLManagedTensor*] c_dlm_tensor diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 30e5580e147..2700fe47c54 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3573,7 +3573,7 @@ def PyCapsule_IsValid(capsule, name): def test_dlpack(value_type, np_type): if Version(np.__version__) < Version("1.24.0"): pytest.skip("No dlpack support in numpy versions older than 1.22.0, " - "strict keyward in assert_array_equal added in numpy version " + "strict keyword in assert_array_equal added in numpy version " "1.24.0") expected = np.array([1, 2, 3], dtype=np_type) From ac85f4ea65578568c8a63528c9e0d5be7ee87bc7 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 09:37:29 +0100 Subject: [PATCH 57/73] Add test helper function --- python/pyarrow/tests/test_array.py | 45 ++++++++++-------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 2700fe47c54..75c6fb86d78 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3554,6 +3554,16 @@ def PyCapsule_IsValid(capsule, name): return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 +def check_dlpack_export(arr, expected_arr): + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected_arr, strict=True) + + assert arr.__dlpack_device__() == (1, 0) + + @pytest.mark.parametrize( ('value_type', 'np_type'), [ @@ -3578,48 +3588,23 @@ def test_dlpack(value_type, np_type): expected = np.array([1, 2, 3], dtype=np_type) arr = pa.array(expected, type=value_type) - DLTensor = arr.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - result = np.from_dlpack(arr) - np.testing.assert_array_equal(result, expected, strict=True) - - assert arr.__dlpack_device__() == (1, 0) + check_dlpack_export(arr, expected) arr_sliced = arr.slice(1, 1) - DLTensor = arr_sliced.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True expected = np.array([2], dtype=np_type) - result = np.from_dlpack(arr_sliced) - np.testing.assert_array_equal(result, expected, strict=True) - - assert arr.__dlpack_device__() == (1, 0) + check_dlpack_export(arr_sliced, expected) arr_sliced = arr.slice(0, 1) - DLTensor = arr_sliced.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True expected = np.array([1], dtype=np_type) - result = np.from_dlpack(arr_sliced) - np.testing.assert_array_equal(result, expected, strict=True) - - assert arr.__dlpack_device__() == (1, 0) + check_dlpack_export(arr_sliced, expected) arr_sliced = arr.slice(1) - DLTensor = arr_sliced.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True expected = np.array([2, 3], dtype=np_type) - result = np.from_dlpack(arr_sliced) - np.testing.assert_array_equal(result, expected, strict=True) - - assert arr.__dlpack_device__() == (1, 0) + check_dlpack_export(arr_sliced, expected) arr_zero = pa.array([], type=value_type) - DLTensor = arr_zero.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True expected = np.array([], dtype=np_type) - result = np.from_dlpack(arr_zero) - np.testing.assert_array_equal(result, expected, strict=True) - - assert arr.__dlpack_device__() == (1, 0) + check_dlpack_export(arr_zero, expected) def test_dlpack_not_supported(): From 7e0d5f8d2c7d80dcdb5505759dd65dfb65c1db7a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 09:41:19 +0100 Subject: [PATCH 58/73] Move tests to separate test_dlpack.py --- python/pyarrow/tests/test_array.py | 107 ----------------------- python/pyarrow/tests/test_dlpack.py | 129 ++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 107 deletions(-) create mode 100644 python/pyarrow/tests/test_dlpack.py diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 75c6fb86d78..2f9727922b4 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -18,7 +18,6 @@ from collections.abc import Iterable import datetime import decimal -import ctypes import hypothesis as h import hypothesis.strategies as st import itertools @@ -32,7 +31,6 @@ import pyarrow as pa import pyarrow.tests.strategies as past -from pyarrow.vendored.version import Version def test_total_bytes_allocated(): @@ -3548,108 +3546,3 @@ def test_run_end_encoded_from_buffers(): with pytest.raises(ValueError): pa.RunEndEncodedArray.from_buffers(ree_type, length, buffers, 1, offset, children) - - -def PyCapsule_IsValid(capsule, name): - return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 - - -def check_dlpack_export(arr, expected_arr): - DLTensor = arr.__dlpack__() - assert PyCapsule_IsValid(DLTensor, b"dltensor") is True - - result = np.from_dlpack(arr) - np.testing.assert_array_equal(result, expected_arr, strict=True) - - assert arr.__dlpack_device__() == (1, 0) - - -@pytest.mark.parametrize( - ('value_type', 'np_type'), - [ - (pa.uint8(), np.uint8), - (pa.uint16(), np.uint16), - (pa.uint32(), np.uint32), - (pa.uint64(), np.uint64), - (pa.int8(), np.int8), - (pa.int16(), np.int16), - (pa.int32(), np.int32), - (pa.int64(), np.int64), - (pa.float16(), np.float16), - (pa.float32(), np.float32), - (pa.float64(), np.float64), - ] -) -def test_dlpack(value_type, np_type): - if Version(np.__version__) < Version("1.24.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0, " - "strict keyword in assert_array_equal added in numpy version " - "1.24.0") - - expected = np.array([1, 2, 3], dtype=np_type) - arr = pa.array(expected, type=value_type) - check_dlpack_export(arr, expected) - - arr_sliced = arr.slice(1, 1) - expected = np.array([2], dtype=np_type) - check_dlpack_export(arr_sliced, expected) - - arr_sliced = arr.slice(0, 1) - expected = np.array([1], dtype=np_type) - check_dlpack_export(arr_sliced, expected) - - arr_sliced = arr.slice(1) - expected = np.array([2, 3], dtype=np_type) - check_dlpack_export(arr_sliced, expected) - - arr_zero = pa.array([], type=value_type) - expected = np.array([], dtype=np_type) - check_dlpack_export(arr_zero, expected) - - -def test_dlpack_not_supported(): - if Version(np.__version__) < Version("1.22.0"): - pytest.skip("No dlpack support in numpy versions older than 1.22.0.") - - arr = pa.array([1, None, 3]) - with pytest.raises(TypeError, match="Can only use DLPack " - "on arrays with no nulls."): - np.from_dlpack(arr) - - arr = pa.array( - [[0, 1], [3, 4]], - type=pa.list_(pa.int32()) - ) - with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): - np.from_dlpack(arr) - - arr = pa.array([]) - with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): - np.from_dlpack(arr) - - # DLPack doesn't support bit-packed boolean values - arr = pa.array([True, False, True]) - with pytest.raises(TypeError, match="Bit-packed boolean data type " - "not supported by DLPack."): - np.from_dlpack(arr) - - -def test_dlpack_cuda_not_supported(): - cuda = pytest.importorskip("pyarrow.cuda") - - schema = pa.schema([pa.field('f0', pa.int16())]) - a0 = pa.array([1, 2, 3], type=pa.int16()) - batch = pa.record_batch([a0], schema=schema) - - cbuf = cuda.serialize_record_batch(batch, cuda.Context(0)) - cbatch = cuda.read_record_batch(cbuf, batch.schema) - carr = cbatch["f0"] - - # CudaBuffers not yet supported - with pytest.raises(NotImplementedError, match="DLPack support is implemented " - "only for buffers on CPU device."): - np.from_dlpack(carr) - - with pytest.raises(NotImplementedError, match="DLPack support is implemented " - "only for buffers on CPU device."): - carr.__dlpack_device__() diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py new file mode 100644 index 00000000000..78c4b30f54b --- /dev/null +++ b/python/pyarrow/tests/test_dlpack.py @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ctypes +import pytest + +import numpy as np + +import pyarrow as pa +from pyarrow.vendored.version import Version + + +def PyCapsule_IsValid(capsule, name): + return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1 + + +def check_dlpack_export(arr, expected_arr): + DLTensor = arr.__dlpack__() + assert PyCapsule_IsValid(DLTensor, b"dltensor") is True + + result = np.from_dlpack(arr) + np.testing.assert_array_equal(result, expected_arr, strict=True) + + assert arr.__dlpack_device__() == (1, 0) + + +@pytest.mark.parametrize( + ('value_type', 'np_type'), + [ + (pa.uint8(), np.uint8), + (pa.uint16(), np.uint16), + (pa.uint32(), np.uint32), + (pa.uint64(), np.uint64), + (pa.int8(), np.int8), + (pa.int16(), np.int16), + (pa.int32(), np.int32), + (pa.int64(), np.int64), + (pa.float16(), np.float16), + (pa.float32(), np.float32), + (pa.float64(), np.float64), + ] +) +def test_dlpack(value_type, np_type): + if Version(np.__version__) < Version("1.24.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0, " + "strict keyword in assert_array_equal added in numpy version " + "1.24.0") + + expected = np.array([1, 2, 3], dtype=np_type) + arr = pa.array(expected, type=value_type) + check_dlpack_export(arr, expected) + + arr_sliced = arr.slice(1, 1) + expected = np.array([2], dtype=np_type) + check_dlpack_export(arr_sliced, expected) + + arr_sliced = arr.slice(0, 1) + expected = np.array([1], dtype=np_type) + check_dlpack_export(arr_sliced, expected) + + arr_sliced = arr.slice(1) + expected = np.array([2, 3], dtype=np_type) + check_dlpack_export(arr_sliced, expected) + + arr_zero = pa.array([], type=value_type) + expected = np.array([], dtype=np_type) + check_dlpack_export(arr_zero, expected) + + +def test_dlpack_not_supported(): + if Version(np.__version__) < Version("1.22.0"): + pytest.skip("No dlpack support in numpy versions older than 1.22.0.") + + arr = pa.array([1, None, 3]) + with pytest.raises(TypeError, match="Can only use DLPack " + "on arrays with no nulls."): + np.from_dlpack(arr) + + arr = pa.array( + [[0, 1], [3, 4]], + type=pa.list_(pa.int32()) + ) + with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): + np.from_dlpack(arr) + + arr = pa.array([]) + with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"): + np.from_dlpack(arr) + + # DLPack doesn't support bit-packed boolean values + arr = pa.array([True, False, True]) + with pytest.raises(TypeError, match="Bit-packed boolean data type " + "not supported by DLPack."): + np.from_dlpack(arr) + + +def test_dlpack_cuda_not_supported(): + cuda = pytest.importorskip("pyarrow.cuda") + + schema = pa.schema([pa.field('f0', pa.int16())]) + a0 = pa.array([1, 2, 3], type=pa.int16()) + batch = pa.record_batch([a0], schema=schema) + + cbuf = cuda.serialize_record_batch(batch, cuda.Context(0)) + cbatch = cuda.read_record_batch(cbuf, batch.schema) + carr = cbatch["f0"] + + # CudaBuffers not yet supported + with pytest.raises(NotImplementedError, match="DLPack support is implemented " + "only for buffers on CPU device."): + np.from_dlpack(carr) + + with pytest.raises(NotImplementedError, match="DLPack support is implemented " + "only for buffers on CPU device."): + carr.__dlpack_device__() From 0fc962ce12bc8861ce08858064ee30a09c42d7b3 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 6 Dec 2023 12:06:38 +0100 Subject: [PATCH 59/73] Apply suggestions from code review Co-authored-by: Joris Van den Bossche --- docs/source/python/dlpack.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst index 8beb76dd3ed..57a386636f9 100644 --- a/docs/source/python/dlpack.rst +++ b/docs/source/python/dlpack.rst @@ -34,19 +34,18 @@ in order to enable device aware data interchange between array/tensor libraries in the Python ecosystem. See more about the standard in the `protocol documentation `_ -and more about the DLPack in the +and more about DLPack in the `Python Specification for DLPack `_. Implementation of DLPack in PyArrow ----------------------------------- -Producing side of the DLPack Protocol is implemented for ``pa.Array`` +The producing side of the DLPack Protocol is implemented for ``pa.Array`` and can be used to interchange data between PyArrow and other tensor -libraries. The data structures that are supported in the implementation -of the protocol are integer, unsigned integer and float arrays. The +libraries. Supported data types are integer, unsigned integer and float. The protocol has no missing data support meaning PyArrow arrays with -missing values cannot be used to transfer data through the DLPack -protocol. Currently Arrow implementation of the protocol only supports +missing values cannot be transferred through the DLPack +protocol. Currently, the Arrow implementation of the protocol only supports data on a CPU device. Data interchange syntax of the protocol includes From 0d81ae0fc711f961fc4b0074c39c2456b7723528 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 6 Dec 2023 12:13:21 +0100 Subject: [PATCH 60/73] Restructure DLMTensorCtx to ManagerCtx and use unique_ptr --- cpp/src/arrow/c/dlpack.cc | 48 +++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 70e8be33686..a43aaaaffe4 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -55,16 +55,11 @@ Result GetDLDataType(const DataType& type) { } } -struct DLMTensorCtx { +struct ManagerCtx { std::shared_ptr ref; - std::vector shape; DLManagedTensor tensor; }; -static void deleter(DLManagedTensor* arg) { - delete static_cast(arg->manager_ctx); -} - Result ExportArray(const std::shared_ptr& arr) { if (arr->null_count() > 0) { return Status::TypeError("Can only use DLPack on arrays with no nulls."); @@ -75,45 +70,44 @@ Result ExportArray(const std::shared_ptr& arr) { const DataType* arrow_type = arr->type().get(); ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(*arrow_type)); - // Create DLMTensorCtx struct with the reference to + // Create ManagerCtx with the reference to // the data of the array std::shared_ptr array_ref = arr->data(); - DLMTensorCtx* DLMTensor = new DLMTensorCtx; - DLMTensor->ref = array_ref; - - // Define DLManagedTensor struct defined by - // DLPack (dlpack_structure.h) - DLManagedTensor* dlm_tensor = &DLMTensor->tensor; - dlm_tensor->manager_ctx = DLMTensor; - dlm_tensor->deleter = &deleter; + std::unique_ptr ctx(new ManagerCtx); + ctx->ref = array_ref; // Define the data pointer to the DLTensor // If array is of length 0, data pointer should be NULL if (arr->length() == 0) { - dlm_tensor->dl_tensor.data = NULL; + ctx->tensor.dl_tensor.data = NULL; } else if (arr->offset() > 0) { const auto byte_width = arr->type()->byte_width(); const auto start = arr->offset() * byte_width; ARROW_ASSIGN_OR_RAISE(auto sliced_buffer, SliceBufferSafe(array_ref->buffers[1], start)); - dlm_tensor->dl_tensor.data = + ctx->tensor.dl_tensor.data = const_cast(reinterpret_cast(sliced_buffer->address())); } else { - dlm_tensor->dl_tensor.data = const_cast( + ctx->tensor.dl_tensor.data = const_cast( reinterpret_cast(array_ref->buffers[1]->address())); } // Define DLDevice struct ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) - dlm_tensor->dl_tensor.device = device; - - dlm_tensor->dl_tensor.ndim = 1; - dlm_tensor->dl_tensor.dtype = dlpack_type; - dlm_tensor->dl_tensor.shape = const_cast(&array_ref->length); - dlm_tensor->dl_tensor.strides = NULL; - dlm_tensor->dl_tensor.byte_offset = 0; - - return dlm_tensor; + ctx->tensor.dl_tensor.device = device; + + ctx->tensor.dl_tensor.ndim = 1; + ctx->tensor.dl_tensor.dtype = dlpack_type; + ctx->tensor.dl_tensor.shape = const_cast(&array_ref->length); + ctx->tensor.dl_tensor.strides = NULL; + ctx->tensor.dl_tensor.byte_offset = 0; + + // return dlm_tensor; + ctx->tensor.manager_ctx = ctx.get(); + ctx->tensor.deleter = [](struct DLManagedTensor* self) { + delete reinterpret_cast(self->manager_ctx); + }; + return &ctx.release()->tensor; } Result ExportDevice(const std::shared_ptr& arr) { From d232de444793a2f9afcb06ccaf4c354b2a969638 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 7 Dec 2023 12:00:08 +0100 Subject: [PATCH 61/73] Rename pycapsule_deleter to dlpack_pycapsule_deleter and reorganize the code --- python/pyarrow/_dlpack.pxi | 14 ++++++++------ python/pyarrow/array.pxi | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 2122b516af1..931b037412b 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -22,7 +22,7 @@ from cpython.pycapsule cimport PyCapsule_New from cython import sizeof -cdef void pycapsule_deleter(object dltensor) noexcept: +cdef void dlpack_pycapsule_deleter(object dltensor) noexcept: cdef DLManagedTensor* dlm_tensor cdef PyObject* err_type cdef PyObject* err_value @@ -36,12 +36,14 @@ cdef void pycapsule_deleter(object dltensor) noexcept: # we create another one cpython.PyErr_Fetch(&err_type, &err_value, &err_traceback) - if cpython.PyCapsule_IsValid(dltensor, 'dltensor'): - dlm_tensor = cpython.PyCapsule_GetPointer( - dltensor, 'dltensor') - dlm_tensor.deleter(dlm_tensor) - else: + dlm_tensor = cpython.PyCapsule_GetPointer(dltensor, 'dltensor') + if dlm_tensor == NULL: cpython.PyErr_WriteUnraisable(dltensor) + # The deleter can be NULL if there is no way for the caller + # to provide a reasonable destructor + elif dlm_tensor.deleter: + dlm_tensor.deleter(dlm_tensor) + assert (not cpython.PyErr_Occurred()) # Set the error indicator from err_type, err_value, err_traceback cpython.PyErr_Restore(err_type, err_value, err_traceback) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index c067b43033e..3c944fc6e55 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1799,7 +1799,7 @@ cdef class Array(_PandasConvertible): c_dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(self)) dlm_tensor = GetResultValue(c_dlm_tensor) - return PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) + return PyCapsule_New(dlm_tensor, 'dltensor', dlpack_pycapsule_deleter) else: raise NotImplementedError( "Only stream=None is supported." From 602e7b582e0439e7ddf6cb1cdb855c99e3479fb7 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 7 Dec 2023 12:07:27 +0100 Subject: [PATCH 62/73] Add link to dlpack GitHub repo --- docs/source/python/dlpack.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst index 57a386636f9..f612ebabde5 100644 --- a/docs/source/python/dlpack.rst +++ b/docs/source/python/dlpack.rst @@ -20,11 +20,12 @@ The DLPack Protocol =================== -The DLPack Protocol is a stable in-memory data structure -that allows exchange between major frameworks working -with multidimensional arrays or tensors. It is -designed for cross hardware support meaning it allows exchange -of data on devices other than the CPU (e.g. GPU). +`The DLPack Protocol `_ +is a stable in-memory data structure that allows exchange +between major frameworks working with multidimensional +arrays or tensors. It is designed for cross hardware +support meaning it allows exchange of data on devices other +than the CPU (e.g. GPU). DLPack protocol had been `selected as the Python array API standard `_ From d9b3182123a0178d56468c50199941d94d01a406 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Thu, 7 Dec 2023 12:33:57 +0100 Subject: [PATCH 63/73] Add check to ExportDevice and restructure ExportArray --- cpp/src/arrow/c/dlpack.cc | 27 ++++++++++++++++++++------- cpp/src/arrow/c/dlpack_test.cc | 4 ++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index a43aaaaffe4..a075d5785c4 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -21,6 +21,7 @@ #include "arrow/c/dlpack_abi.h" #include "arrow/device.h" #include "arrow/type.h" +#include "arrow/type_traits.h" namespace arrow::dlpack { @@ -61,12 +62,12 @@ struct ManagerCtx { }; Result ExportArray(const std::shared_ptr& arr) { - if (arr->null_count() > 0) { - return Status::TypeError("Can only use DLPack on arrays with no nulls."); - } + // Define DLDevice struct nad check if array type is supported + // by the DLPack protocol at the same time. Raise TypeError if not. + // Supported data types: int, uint, float with no validity buffer. + ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) // Define the DLDataType struct - // Supported data types: int, uint, float const DataType* arrow_type = arr->type().get(); ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(*arrow_type)); @@ -92,10 +93,7 @@ Result ExportArray(const std::shared_ptr& arr) { reinterpret_cast(array_ref->buffers[1]->address())); } - // Define DLDevice struct - ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) ctx->tensor.dl_tensor.device = device; - ctx->tensor.dl_tensor.ndim = 1; ctx->tensor.dl_tensor.dtype = dlpack_type; ctx->tensor.dl_tensor.shape = const_cast(&array_ref->length); @@ -111,6 +109,21 @@ Result ExportArray(const std::shared_ptr& arr) { } Result ExportDevice(const std::shared_ptr& arr) { + // Check if array is supported by the DLPack protocol. + if (arr->null_count() > 0) { + return Status::TypeError("Can only use DLPack on arrays with no nulls."); + } + const DataType* arrow_type = arr->type().get(); + if (arrow_type->id() == Type::BOOL) { + return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); + } + if (!is_integer(arrow_type->id()) && !is_unsigned_integer(arrow_type->id()) && + !is_floating(arrow_type->id())) { + return Status::TypeError("DataType is not compatible with DLPack spec: ", + arrow_type->ToString()); + } + + // Define DLDevice struct DLDevice device; if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) { device.device_id = 0; diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index dc8c282e0c2..2043da30c6a 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -108,6 +108,10 @@ TEST_F(TestExportArray, TestUnSupportedArray) { ASSERT_RAISES_WITH_MESSAGE( TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", arrow::dlpack::ExportArray(array_boolean)); + + ASSERT_RAISES_WITH_MESSAGE( + TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", + arrow::dlpack::ExportDevice(array_boolean)); } } // namespace arrow::dlpack From 015ac3d64c7aa3385a1dd517f99d2e531dc23937 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 11 Dec 2023 06:27:42 +0100 Subject: [PATCH 64/73] Remove old code --- cpp/src/arrow/c/dlpack_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 2043da30c6a..e09ecfee59d 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -25,8 +25,6 @@ namespace arrow::dlpack { -// using ExportArray = arrow::dlpack::ExportArray; - class TestExportArray : public ::testing::Test { public: void SetUp() {} From d2eb7c87fb11eb49c8d6b0b2d3d60f508b45db51 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 11 Dec 2023 16:08:47 +0100 Subject: [PATCH 65/73] Add dlpack_abi.h to rat_exclude_files.txt. --- dev/release/rat_exclude_files.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index ce637bf8392..4f86a12afe4 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -12,6 +12,7 @@ ci/etc/*.patch ci/vcpkg/*.patch CHANGELOG.md cpp/CHANGELOG_PARQUET.md +cpp/src/arrow/c/dlpack_abi.h cpp/src/arrow/io/mman.h cpp/src/arrow/util/random.h cpp/src/arrow/status.cc From 15ead8fc464035ccc3c103fbfd315507dc8c60d8 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 06:17:52 +0100 Subject: [PATCH 66/73] dlpack.cc - Add unnamed namespace and optimize code in ExportDevice --- cpp/src/arrow/c/dlpack.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index a075d5785c4..16dfcc57484 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -25,6 +25,8 @@ namespace arrow::dlpack { +namespace { + Result GetDLDataType(const DataType& type) { DLDataType dtype; dtype.lanes = 1; @@ -56,6 +58,7 @@ Result GetDLDataType(const DataType& type) { } } +} // namespace struct ManagerCtx { std::shared_ptr ref; DLManagedTensor tensor; @@ -100,7 +103,6 @@ Result ExportArray(const std::shared_ptr& arr) { ctx->tensor.dl_tensor.strides = NULL; ctx->tensor.dl_tensor.byte_offset = 0; - // return dlm_tensor; ctx->tensor.manager_ctx = ctx.get(); ctx->tensor.deleter = [](struct DLManagedTensor* self) { delete reinterpret_cast(self->manager_ctx); @@ -117,8 +119,7 @@ Result ExportDevice(const std::shared_ptr& arr) { if (arrow_type->id() == Type::BOOL) { return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); } - if (!is_integer(arrow_type->id()) && !is_unsigned_integer(arrow_type->id()) && - !is_floating(arrow_type->id())) { + if (!is_integer(arrow_type->id()) && !is_floating(arrow_type->id())) { return Status::TypeError("DataType is not compatible with DLPack spec: ", arrow_type->ToString()); } From 5e871388d98262c46bbaf42fbf560001e8491dcc Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 08:23:54 +0100 Subject: [PATCH 67/73] dlpack_test.cc - remove gen, add a check for memory deallocation and change for loop --- cpp/src/arrow/c/dlpack_test.cc | 63 ++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index e09ecfee59d..deaa7d278d1 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -20,8 +20,8 @@ #include "arrow/array/array_base.h" #include "arrow/c/dlpack.h" #include "arrow/c/dlpack_abi.h" +#include "arrow/memory_pool.h" #include "arrow/testing/gtest_util.h" -#include "arrow/testing/random.h" namespace arrow::dlpack { @@ -62,47 +62,60 @@ auto check_dlptensor = [](const std::shared_ptr& arr, }; TEST_F(TestExportArray, TestSupportedArray) { - random::RandomArrayGenerator gen(0); - - std::vector> arrow_types = { - int8(), uint8(), int16(), uint16(), int32(), uint32(), - int64(), uint64(), float16(), float32(), float64(), - }; - - std::vector dlpack_types = { - DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, - DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, - DLDataTypeCode::kDLInt, DLDataTypeCode::kDLUInt, DLDataTypeCode::kDLFloat, - DLDataTypeCode::kDLFloat, DLDataTypeCode::kDLFloat, - }; - - for (int64_t i = 0; i < 11; ++i) { - const std::shared_ptr array = gen.ArrayOf(arrow_types[i], 10, 0); - check_dlptensor(array, arrow_types[i], dlpack_types[i], 10); + std::vector, DLDataTypeCode>> cases = { + {int8(), DLDataTypeCode::kDLInt}, + {uint8(), DLDataTypeCode::kDLUInt}, + { + int16(), + DLDataTypeCode::kDLInt, + }, + {uint16(), DLDataTypeCode::kDLUInt}, + { + int32(), + DLDataTypeCode::kDLInt, + }, + {uint32(), DLDataTypeCode::kDLUInt}, + { + int64(), + DLDataTypeCode::kDLInt, + }, + {uint64(), DLDataTypeCode::kDLUInt}, + {float16(), DLDataTypeCode::kDLFloat}, + {float32(), DLDataTypeCode::kDLFloat}, + {float64(), DLDataTypeCode::kDLFloat}}; + + const auto allocated_bytes = arrow::default_memory_pool()->bytes_allocated(); + + for (auto [arrow_type, dlpack_type] : cases) { + const std::shared_ptr array = + ArrayFromJSON(arrow_type, "[1, 0, 10, 0, 2, 1, 3, 5, 1, 0]"); + check_dlptensor(array, arrow_type, dlpack_type, 10); ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); - check_dlptensor(sliced_1, arrow_types[i], dlpack_types[i], 5); + check_dlptensor(sliced_1, arrow_type, dlpack_type, 5); ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); - check_dlptensor(sliced_2, arrow_types[i], dlpack_types[i], 5); + check_dlptensor(sliced_2, arrow_type, dlpack_type, 5); ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); - check_dlptensor(sliced_3, arrow_types[i], dlpack_types[i], 7); + check_dlptensor(sliced_3, arrow_type, dlpack_type, 7); } + + ASSERT_EQ(allocated_bytes, arrow::default_memory_pool()->bytes_allocated()); } TEST_F(TestExportArray, TestUnSupportedArray) { - random::RandomArrayGenerator gen(0); + const std::shared_ptr array_with_null = ArrayFromJSON(int8(), "[1, 100, null]"); + const std::shared_ptr array_string = + ArrayFromJSON(utf8(), R"(["itsy", "bitsy", "spider"])"); + const std::shared_ptr array_boolean = ArrayFromJSON(boolean(), "[true, false]"); - const std::shared_ptr array_with_null = gen.Int8(10, 1, 100, 1); ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: Can only use DLPack on arrays with no nulls.", arrow::dlpack::ExportArray(array_with_null)); - const std::shared_ptr array_string = gen.String(10, 0, 10, 0); ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: DataType is not compatible with DLPack spec: " + array_string->type()->ToString(), arrow::dlpack::ExportArray(array_string)); - const std::shared_ptr array_boolean = gen.Boolean(10, 0.5, 0); ASSERT_RAISES_WITH_MESSAGE( TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", arrow::dlpack::ExportArray(array_boolean)); From 2160ecd1ec7db2f051cc0cea7091b6570731a70a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 09:23:34 +0100 Subject: [PATCH 68/73] _dlpack.pxi - remove unsued imports --- python/pyarrow/_dlpack.pxi | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi index 931b037412b..c2f4cff6406 100644 --- a/python/pyarrow/_dlpack.pxi +++ b/python/pyarrow/_dlpack.pxi @@ -15,11 +15,8 @@ # specific language governing permissions and limitations # under the License. -from libc.stdlib cimport malloc, free - cimport cpython from cpython.pycapsule cimport PyCapsule_New -from cython import sizeof cdef void dlpack_pycapsule_deleter(object dltensor) noexcept: From 8bb717333eae8b3301bab299ac699c5ea06c59e4 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 09:41:45 +0100 Subject: [PATCH 69/73] array.pxi - simplify the code and change docstrings --- python/pyarrow/array.pxi | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 3c944fc6e55..051bf4fc28f 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1793,11 +1793,8 @@ cdef class Array(_PandasConvertible): capsule : PyCapsule A DLPack capsule for the array, pointing to a DLManagedTensor. """ - cdef CResult[DLManagedTensor*] c_dlm_tensor - if stream is None: - c_dlm_tensor = ExportToDLPack(pyarrow_unwrap_array(self)) - dlm_tensor = GetResultValue(c_dlm_tensor) + dlm_tensor = GetResultValue(ExportToDLPack(pyarrow_unwrap_array(self))) return PyCapsule_New(dlm_tensor, 'dltensor', dlpack_pycapsule_deleter) else: @@ -1807,7 +1804,7 @@ cdef class Array(_PandasConvertible): def __dlpack_device__(self): """ - Performs the operation __dlpack_device__. + Returns the DLPack device tuple this arrays resides on. Returns ------- @@ -1815,10 +1812,7 @@ cdef class Array(_PandasConvertible): Tuple with enumerator specifying the type of the device and index of the device which is 0 by default for CPU. """ - cdef CResult[DLDevice] c_device - c_device = ExportDevice(pyarrow_unwrap_array(self)) - device = GetResultValue(c_device) - + device = GetResultValue(ExportDevice(pyarrow_unwrap_array(self))) return (device.device_type, device.device_id) From 938994181b30855b416f80ffad3f9942880e4c45 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 10:20:25 +0100 Subject: [PATCH 70/73] test_dlpack.py - add a check for memory deallocation --- python/pyarrow/tests/test_dlpack.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py index 78c4b30f54b..7cf3f4acdbd 100644 --- a/python/pyarrow/tests/test_dlpack.py +++ b/python/pyarrow/tests/test_dlpack.py @@ -16,6 +16,7 @@ # under the License. import ctypes +from functools import wraps import pytest import numpy as np @@ -38,6 +39,18 @@ def check_dlpack_export(arr, expected_arr): assert arr.__dlpack_device__() == (1, 0) +def check_bytes_allocated(f): + @wraps(f) + def wrapper(*args, **kwargs): + allocated_bytes = pa.total_allocated_bytes() + try: + return f(*args, **kwargs) + finally: + assert pa.total_allocated_bytes() == allocated_bytes + return wrapper + + +@check_bytes_allocated @pytest.mark.parametrize( ('value_type', 'np_type'), [ From 94dec4baf48b0b8cee276d9c3e0daf48a0b0172a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 12 Dec 2023 13:56:15 +0100 Subject: [PATCH 71/73] Change docstrings in __dlpack_device__ --- python/pyarrow/array.pxi | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 051bf4fc28f..43fb3efd079 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1808,9 +1808,10 @@ cdef class Array(_PandasConvertible): Returns ------- - tuple : Tuple[DLDeviceType, int] - Tuple with enumerator specifying the type of the device - and index of the device which is 0 by default for CPU. + tuple : Tuple[int, int] + Tuple with index specifying the type of the device (where + CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the + device which is 0 by default for CPU. """ device = GetResultValue(ExportDevice(pyarrow_unwrap_array(self))) return (device.device_type, device.device_id) From 811d2b56ffbd5313a31ed96db798daa4a267970a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 13 Dec 2023 09:26:00 +0100 Subject: [PATCH 72/73] Add null type empty array to C++ tests --- cpp/src/arrow/c/dlpack_test.cc | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index deaa7d278d1..765d695ba9f 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -102,24 +102,25 @@ TEST_F(TestExportArray, TestSupportedArray) { } TEST_F(TestExportArray, TestUnSupportedArray) { - const std::shared_ptr array_with_null = ArrayFromJSON(int8(), "[1, 100, null]"); - const std::shared_ptr array_string = - ArrayFromJSON(utf8(), R"(["itsy", "bitsy", "spider"])"); - const std::shared_ptr array_boolean = ArrayFromJSON(boolean(), "[true, false]"); + const std::shared_ptr array_null = ArrayFromJSON(null(), "[]"); + ASSERT_RAISES_WITH_MESSAGE(TypeError, + "Type error: DataType is not compatible with DLPack spec: " + + array_null->type()->ToString(), + arrow::dlpack::ExportArray(array_null)); + const std::shared_ptr array_with_null = ArrayFromJSON(int8(), "[1, 100, null]"); ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: Can only use DLPack on arrays with no nulls.", arrow::dlpack::ExportArray(array_with_null)); + const std::shared_ptr array_string = + ArrayFromJSON(utf8(), R"(["itsy", "bitsy", "spider"])"); ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: DataType is not compatible with DLPack spec: " + array_string->type()->ToString(), arrow::dlpack::ExportArray(array_string)); - ASSERT_RAISES_WITH_MESSAGE( - TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", - arrow::dlpack::ExportArray(array_boolean)); - + const std::shared_ptr array_boolean = ArrayFromJSON(boolean(), "[true, false]"); ASSERT_RAISES_WITH_MESSAGE( TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.", arrow::dlpack::ExportDevice(array_boolean)); From 49a978fcef620e6c2f9d68cf3b9ebae03c5a1020 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 19 Dec 2023 18:37:18 +0100 Subject: [PATCH 73/73] Nits and cleanups --- cpp/src/arrow/c/dlpack.cc | 38 ++++++++++++++-------------------- cpp/src/arrow/c/dlpack.h | 9 ++++---- cpp/src/arrow/c/dlpack_test.cc | 20 +++++++++--------- python/pyarrow/array.pxi | 8 +++---- 4 files changed, 34 insertions(+), 41 deletions(-) diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc index 16dfcc57484..13ee2761b0c 100644 --- a/cpp/src/arrow/c/dlpack.cc +++ b/cpp/src/arrow/c/dlpack.cc @@ -58,12 +58,13 @@ Result GetDLDataType(const DataType& type) { } } -} // namespace struct ManagerCtx { - std::shared_ptr ref; + std::shared_ptr array; DLManagedTensor tensor; }; +} // namespace + Result ExportArray(const std::shared_ptr& arr) { // Define DLDevice struct nad check if array type is supported // by the DLPack protocol at the same time. Raise TypeError if not. @@ -71,38 +72,31 @@ Result ExportArray(const std::shared_ptr& arr) { ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) // Define the DLDataType struct - const DataType* arrow_type = arr->type().get(); - ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(*arrow_type)); + const DataType& type = *arr->type(); + std::shared_ptr data = arr->data(); + ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(type)); - // Create ManagerCtx with the reference to - // the data of the array - std::shared_ptr array_ref = arr->data(); + // Create ManagerCtx that will serve as the owner of the DLManagedTensor std::unique_ptr ctx(new ManagerCtx); - ctx->ref = array_ref; // Define the data pointer to the DLTensor // If array is of length 0, data pointer should be NULL if (arr->length() == 0) { ctx->tensor.dl_tensor.data = NULL; - } else if (arr->offset() > 0) { - const auto byte_width = arr->type()->byte_width(); - const auto start = arr->offset() * byte_width; - ARROW_ASSIGN_OR_RAISE(auto sliced_buffer, - SliceBufferSafe(array_ref->buffers[1], start)); - ctx->tensor.dl_tensor.data = - const_cast(reinterpret_cast(sliced_buffer->address())); } else { - ctx->tensor.dl_tensor.data = const_cast( - reinterpret_cast(array_ref->buffers[1]->address())); + const auto data_offset = data->offset * type.byte_width(); + ctx->tensor.dl_tensor.data = + const_cast(data->buffers[1]->data() + data_offset); } ctx->tensor.dl_tensor.device = device; ctx->tensor.dl_tensor.ndim = 1; ctx->tensor.dl_tensor.dtype = dlpack_type; - ctx->tensor.dl_tensor.shape = const_cast(&array_ref->length); + ctx->tensor.dl_tensor.shape = const_cast(&data->length); ctx->tensor.dl_tensor.strides = NULL; ctx->tensor.dl_tensor.byte_offset = 0; + ctx->array = std::move(data); ctx->tensor.manager_ctx = ctx.get(); ctx->tensor.deleter = [](struct DLManagedTensor* self) { delete reinterpret_cast(self->manager_ctx); @@ -115,13 +109,13 @@ Result ExportDevice(const std::shared_ptr& arr) { if (arr->null_count() > 0) { return Status::TypeError("Can only use DLPack on arrays with no nulls."); } - const DataType* arrow_type = arr->type().get(); - if (arrow_type->id() == Type::BOOL) { + const DataType& type = *arr->type(); + if (type.id() == Type::BOOL) { return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); } - if (!is_integer(arrow_type->id()) && !is_floating(arrow_type->id())) { + if (!is_integer(type.id()) && !is_floating(type.id())) { return Status::TypeError("DataType is not compatible with DLPack spec: ", - arrow_type->ToString()); + type.ToString()); } // Define DLDevice struct diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h index 03b6c9c6120..d11ccfc1fd7 100644 --- a/cpp/src/arrow/c/dlpack.h +++ b/cpp/src/arrow/c/dlpack.h @@ -22,14 +22,13 @@ namespace arrow::dlpack { -/// \brief DLPack protocol for producing DLManagedTensor +/// \brief Export Arrow array as DLPack tensor. /// -/// DLMangedTensor is produced from an array as defined by -/// the DLPack protocol, see https://dmlc.github.io/dlpack/latest/. +/// DLMangedTensor is produced as defined by the DLPack protocol, +/// see https://dmlc.github.io/dlpack/latest/. /// /// Data types for which the protocol is supported are -/// primitive data types without NullType, BooleanType and -/// Decimal types. +/// integer and floating-point data types. /// /// DLPack protocol only supports arrays with one contiguous /// memory region which means Arrow Arrays with validity buffers diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc index 765d695ba9f..3136506bf39 100644 --- a/cpp/src/arrow/c/dlpack_test.cc +++ b/cpp/src/arrow/c/dlpack_test.cc @@ -30,9 +30,9 @@ class TestExportArray : public ::testing::Test { void SetUp() {} }; -auto check_dlptensor = [](const std::shared_ptr& arr, - std::shared_ptr arrow_type, - DLDataTypeCode dlpack_type, int64_t length) { +void CheckDLTensor(const std::shared_ptr& arr, + const std::shared_ptr& arrow_type, + DLDataTypeCode dlpack_type, int64_t length) { ASSERT_OK_AND_ASSIGN(auto dlmtensor, arrow::dlpack::ExportArray(arr)); auto dltensor = dlmtensor->dl_tensor; @@ -59,10 +59,10 @@ auto check_dlptensor = [](const std::shared_ptr& arr, ASSERT_EQ(0, device.device_id); dlmtensor->deleter(dlmtensor); -}; +} TEST_F(TestExportArray, TestSupportedArray) { - std::vector, DLDataTypeCode>> cases = { + const std::vector, DLDataTypeCode>> cases = { {int8(), DLDataTypeCode::kDLInt}, {uint8(), DLDataTypeCode::kDLUInt}, { @@ -89,19 +89,19 @@ TEST_F(TestExportArray, TestSupportedArray) { for (auto [arrow_type, dlpack_type] : cases) { const std::shared_ptr array = ArrayFromJSON(arrow_type, "[1, 0, 10, 0, 2, 1, 3, 5, 1, 0]"); - check_dlptensor(array, arrow_type, dlpack_type, 10); + CheckDLTensor(array, arrow_type, dlpack_type, 10); ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5)); - check_dlptensor(sliced_1, arrow_type, dlpack_type, 5); + CheckDLTensor(sliced_1, arrow_type, dlpack_type, 5); ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5)); - check_dlptensor(sliced_2, arrow_type, dlpack_type, 5); + CheckDLTensor(sliced_2, arrow_type, dlpack_type, 5); ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3)); - check_dlptensor(sliced_3, arrow_type, dlpack_type, 7); + CheckDLTensor(sliced_3, arrow_type, dlpack_type, 7); } ASSERT_EQ(allocated_bytes, arrow::default_memory_pool()->bytes_allocated()); } -TEST_F(TestExportArray, TestUnSupportedArray) { +TEST_F(TestExportArray, TestErrors) { const std::shared_ptr array_null = ArrayFromJSON(null(), "[]"); ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: DataType is not compatible with DLPack spec: " + diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 43fb3efd079..92b8d650401 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1794,7 +1794,7 @@ cdef class Array(_PandasConvertible): A DLPack capsule for the array, pointing to a DLManagedTensor. """ if stream is None: - dlm_tensor = GetResultValue(ExportToDLPack(pyarrow_unwrap_array(self))) + dlm_tensor = GetResultValue(ExportToDLPack(self.sp_array)) return PyCapsule_New(dlm_tensor, 'dltensor', dlpack_pycapsule_deleter) else: @@ -1804,7 +1804,7 @@ cdef class Array(_PandasConvertible): def __dlpack_device__(self): """ - Returns the DLPack device tuple this arrays resides on. + Return the DLPack device tuple this arrays resides on. Returns ------- @@ -1813,8 +1813,8 @@ cdef class Array(_PandasConvertible): CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the device which is 0 by default for CPU. """ - device = GetResultValue(ExportDevice(pyarrow_unwrap_array(self))) - return (device.device_type, device.device_id) + device = GetResultValue(ExportDevice(self.sp_array)) + return device.device_type, device.device_id cdef _array_like_to_pandas(obj, options, types_mapper):