From 489a2bef6ba3ae04de10cf6bd8fbb2cb84d055b9 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Thu, 25 Jul 2024 21:44:37 +0000 Subject: [PATCH 01/11] [Python]: add bindings for additional Buffer class non-CPU methods --- python/pyarrow/device.pxi | 1 - python/pyarrow/includes/libarrow.pxd | 1 + python/pyarrow/io.pxi | 21 +++++++++++++++++++++ python/pyarrow/tests/test_cuda.py | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi index 26256de6209..73703601188 100644 --- a/python/pyarrow/device.pxi +++ b/python/pyarrow/device.pxi @@ -158,7 +158,6 @@ cdef class MemoryManager(_Weakrefable): """ return self.memory_manager.get().is_cpu() - def default_cpu_memory_manager(): """ Return the default CPU MemoryManager instance. diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8e6922a912a..011a819b5a7 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -366,6 +366,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CDevice] device() const shared_ptr[CMemoryManager] memory_manager() CDeviceAllocationType device_type() + CResult[shared_ptr[CBuffer]] Copy(shared_ptr[CBuffer] source, const shared_ptr[CMemoryManager]& to) const CResult[shared_ptr[CBuffer]] SliceBufferSafe( const shared_ptr[CBuffer]& buffer, int64_t offset) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 1d942e8ccab..9761a1e14f5 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1446,6 +1446,27 @@ cdef class Buffer(_Weakrefable): """ return _wrap_device_allocation_type(self.buffer.get().device_type()) + def copy(self, MemoryManager mm): + """ + The buffer contents will be copied into a new buffer allocated by the + given MemoryManager. This function supports cross-device copies. + + Parameters + --------- + MemoryManager + Memory manager used to allocate the new buffer. + + Returns + ------- + Buffer + """ + cdef: + shared_ptr[CBuffer] c_buffer + + c_buffer = GetResultValue(self.buffer.get().Copy(self.buffer, mm.unwrap())) + return pyarrow_wrap_buffer(c_buffer) + + @property def parent(self): cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent() diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index a71fa036503..b6979059b7d 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -118,6 +118,22 @@ def make_random_buffer(size, target='host'): return arr, dbuf raise ValueError('invalid target value') +def test_copy_from_buffer(): + arr, buf = make_random_buffer(128) + cudabuf = global_context.buffer_from_data(buf) + + mm2 = global_context1.memory_manager + buf2 = cudabuf.copy(mm2) + cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) + cudabuf2.size == cudabuf.size + + arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8) + np.testing.assert_equal(arr, arr2) + + assert cudabuf2.memory_manager.device == mm2.device + + + @pytest.mark.parametrize("size", [0, 1, 1000]) def test_context_device_buffer(size): From e0b05a3f13b7f25363f346913357122e0d6d2bc1 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Wed, 7 Aug 2024 23:43:58 +0000 Subject: [PATCH 02/11] add test for copy buffer in test_io.py --- python/pyarrow/tests/test_io.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index e2df1b1c468..111c75ff1e1 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -710,6 +710,13 @@ def test_non_cpu_buffer(pickle_module): # Sliced buffers with same address assert buf_on_gpu_sliced.equals(cuda_buf[2:4]) + # Testing copying buffer + mm = ctx.memory_manager + buf2 = cuda_buf.copy(mm) + cuda_buf2 = cuda.CudaBuffer.from_buffer(buf2) + cuda_buf2.size == cuda_buf.size + cuda_buf2.copy_to_host()[:] == b'testing' + # Buffers on different devices msg_device = "Device on which the data resides differs between buffers" with pytest.raises(ValueError, match=msg_device): From 77876f9c3e58f4e2e00b821f8f68dd15f6576415 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Wed, 7 Aug 2024 23:58:04 +0000 Subject: [PATCH 03/11] Run linter --- python/pyarrow/device.pxi | 1 + python/pyarrow/io.pxi | 1 - python/pyarrow/tests/test_cuda.py | 3 +-- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi index 73703601188..26256de6209 100644 --- a/python/pyarrow/device.pxi +++ b/python/pyarrow/device.pxi @@ -158,6 +158,7 @@ cdef class MemoryManager(_Weakrefable): """ return self.memory_manager.get().is_cpu() + def default_cpu_memory_manager(): """ Return the default CPU MemoryManager instance. diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 9761a1e14f5..6fa2d02c919 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1466,7 +1466,6 @@ cdef class Buffer(_Weakrefable): c_buffer = GetResultValue(self.buffer.get().Copy(self.buffer, mm.unwrap())) return pyarrow_wrap_buffer(c_buffer) - @property def parent(self): cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent() diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index b6979059b7d..c0a2916c034 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -118,6 +118,7 @@ def make_random_buffer(size, target='host'): return arr, dbuf raise ValueError('invalid target value') + def test_copy_from_buffer(): arr, buf = make_random_buffer(128) cudabuf = global_context.buffer_from_data(buf) @@ -133,8 +134,6 @@ def test_copy_from_buffer(): assert cudabuf2.memory_manager.device == mm2.device - - @pytest.mark.parametrize("size", [0, 1, 1000]) def test_context_device_buffer(size): # Creating device buffer from host buffer; From af0258809be7f21ad25ad29b9bc8fa15730cf56d Mon Sep 17 00:00:00 2001 From: anjakefala Date: Mon, 12 Aug 2024 20:17:52 +0000 Subject: [PATCH 04/11] Also allow Device argument --- python/pyarrow/io.pxi | 24 ++++++++++++++++++------ python/pyarrow/tests/test_cuda.py | 17 ++++++++++------- python/pyarrow/tests/test_io.py | 10 ++++++---- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 6fa2d02c919..6cca4977993 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1446,15 +1446,16 @@ cdef class Buffer(_Weakrefable): """ return _wrap_device_allocation_type(self.buffer.get().device_type()) - def copy(self, MemoryManager mm): + def copy(self, dest): """ - The buffer contents will be copied into a new buffer allocated by the - given MemoryManager. This function supports cross-device copies. + The buffer contents will be copied into a new buffer allocated onto + the destination MemoryManager or Device. It will return the new Buffer. + This function supports cross-device copies. Parameters --------- - MemoryManager - Memory manager used to allocate the new buffer. + dest + pyarrow.MemoryManager or pyarrow.Device used to allocate the new buffer. Returns ------- @@ -1462,8 +1463,19 @@ cdef class Buffer(_Weakrefable): """ cdef: shared_ptr[CBuffer] c_buffer + shared_ptr[CMemoryManager] c_memory_manager + + if isinstance(dest, Device): + c_memory_manager = (dest).unwrap().get().default_memory_manager() + elif isinstance(dest, MemoryManager): + c_memory_manager = (dest).unwrap() + else: + raise TypeError( + "Argument 'dest' is incorrect type (expected pyarrow.Device or " + f"pyarrow.MemoryManager, got {type(dest)})" + ) - c_buffer = GetResultValue(self.buffer.get().Copy(self.buffer, mm.unwrap())) + c_buffer = GetResultValue(self.buffer.get().Copy(self.buffer, c_memory_manager)) return pyarrow_wrap_buffer(c_buffer) @property diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index c0a2916c034..63ce9a851cf 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -124,14 +124,17 @@ def test_copy_from_buffer(): cudabuf = global_context.buffer_from_data(buf) mm2 = global_context1.memory_manager - buf2 = cudabuf.copy(mm2) - cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) - cudabuf2.size == cudabuf.size - - arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8) - np.testing.assert_equal(arr, arr2) + for dest in [mm2, mm2.device]: + buf2 = cudabuf.copy(dest) + cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) + cudabuf2.size == cudabuf.size + assert not cudabuf2.is_cpu + assert cudabuf2.device_type == pa.DeviceAllocationType.CUDA + + arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8) + np.testing.assert_equal(arr, arr2) - assert cudabuf2.memory_manager.device == mm2.device + assert cudabuf2.device == mm2.device @pytest.mark.parametrize("size", [0, 1, 1000]) diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 111c75ff1e1..19d68bb674a 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -712,10 +712,12 @@ def test_non_cpu_buffer(pickle_module): # Testing copying buffer mm = ctx.memory_manager - buf2 = cuda_buf.copy(mm) - cuda_buf2 = cuda.CudaBuffer.from_buffer(buf2) - cuda_buf2.size == cuda_buf.size - cuda_buf2.copy_to_host()[:] == b'testing' + for dest in [mm, mm.device]: + buf2 = cuda_buf.copy(dest) + cuda_buf2 = cuda.CudaBuffer.from_buffer(buf2) + cuda_buf2.size == cuda_buf.size + cuda_buf2.copy_to_host()[:] == b'testing' + assert cuda_buf2.device == mm.device # Buffers on different devices msg_device = "Device on which the data resides differs between buffers" From d7a43bcc1ed59117d0f80c7b05e5cc6047281013 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Fri, 16 Aug 2024 21:30:07 +0000 Subject: [PATCH 05/11] Attempting to fix doc bug --- python/pyarrow/io.pxi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 6cca4977993..e09f7816b4f 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1453,9 +1453,9 @@ cdef class Buffer(_Weakrefable): This function supports cross-device copies. Parameters - --------- - dest - pyarrow.MemoryManager or pyarrow.Device used to allocate the new buffer. + ---------- + dest : pyarrow.MemoryManager or pyarrow.Device + Used to allocate the new buffer. Returns ------- From 91cbaee1614183d2fa3021356788d8a1a2cbf844 Mon Sep 17 00:00:00 2001 From: Anja Kefala Date: Fri, 6 Sep 2024 13:57:51 -0400 Subject: [PATCH 06/11] Update python/pyarrow/tests/test_cuda.py Co-authored-by: Joris Van den Bossche --- python/pyarrow/tests/test_cuda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index 63ce9a851cf..29b322e695a 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -126,6 +126,7 @@ def test_copy_from_buffer(): mm2 = global_context1.memory_manager for dest in [mm2, mm2.device]: buf2 = cudabuf.copy(dest) + assert buf2.device_type == pa.DeviceAllocationType.CUDA cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) cudabuf2.size == cudabuf.size assert not cudabuf2.is_cpu From c08398a74025b1500dc376652cbd2bc13123f925 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Fri, 6 Sep 2024 18:01:47 +0000 Subject: [PATCH 07/11] Simiplify buffer equality check --- python/pyarrow/tests/test_cuda.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index 29b322e695a..c85a36e4de4 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -132,8 +132,7 @@ def test_copy_from_buffer(): assert not cudabuf2.is_cpu assert cudabuf2.device_type == pa.DeviceAllocationType.CUDA - arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8) - np.testing.assert_equal(arr, arr2) + assert cudabuf2.copy_to_host().equals(buf) assert cudabuf2.device == mm2.device From 42253b96dd41bbd2056abbc2cf0308bb413cb871 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Fri, 6 Sep 2024 18:57:12 +0000 Subject: [PATCH 08/11] Add missing assert --- python/pyarrow/tests/test_cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index c85a36e4de4..9e9d543ee1c 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -128,7 +128,7 @@ def test_copy_from_buffer(): buf2 = cudabuf.copy(dest) assert buf2.device_type == pa.DeviceAllocationType.CUDA cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) - cudabuf2.size == cudabuf.size + assert cudabuf2.size == cudabuf.size assert not cudabuf2.is_cpu assert cudabuf2.device_type == pa.DeviceAllocationType.CUDA From f7ff6823f110afaa5e9c4719c0cf89668c828eb2 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Fri, 13 Sep 2024 17:07:06 +0000 Subject: [PATCH 09/11] Apply PR comments --- python/pyarrow/includes/libarrow.pxd | 4 +++- python/pyarrow/io.pxi | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 011a819b5a7..8bd29101ec6 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -366,7 +366,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CDevice] device() const shared_ptr[CMemoryManager] memory_manager() CDeviceAllocationType device_type() - CResult[shared_ptr[CBuffer]] Copy(shared_ptr[CBuffer] source, const shared_ptr[CMemoryManager]& to) const + + @staticmethod + CResult[shared_ptr[CBuffer]] Copy(shared_ptr[CBuffer] source, const shared_ptr[CMemoryManager]& to) CResult[shared_ptr[CBuffer]] SliceBufferSafe( const shared_ptr[CBuffer]& buffer, int64_t offset) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index e09f7816b4f..646b33b1427 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1446,15 +1446,17 @@ cdef class Buffer(_Weakrefable): """ return _wrap_device_allocation_type(self.buffer.get().device_type()) - def copy(self, dest): + def copy(self, destination): """ + Copy the buffer to the destination device. + The buffer contents will be copied into a new buffer allocated onto the destination MemoryManager or Device. It will return the new Buffer. This function supports cross-device copies. Parameters ---------- - dest : pyarrow.MemoryManager or pyarrow.Device + destination : pyarrow.MemoryManager or pyarrow.Device Used to allocate the new buffer. Returns @@ -1465,17 +1467,17 @@ cdef class Buffer(_Weakrefable): shared_ptr[CBuffer] c_buffer shared_ptr[CMemoryManager] c_memory_manager - if isinstance(dest, Device): - c_memory_manager = (dest).unwrap().get().default_memory_manager() - elif isinstance(dest, MemoryManager): - c_memory_manager = (dest).unwrap() + if isinstance(destination, Device): + c_memory_manager = (destination).unwrap().get().default_memory_manager() + elif isinstance(destination, MemoryManager): + c_memory_manager = (destination).unwrap() else: raise TypeError( - "Argument 'dest' is incorrect type (expected pyarrow.Device or " - f"pyarrow.MemoryManager, got {type(dest)})" + "Argument 'destination' is incorrect type (expected pyarrow.Device or " + f"pyarrow.MemoryManager, got {type(destination)})" ) - c_buffer = GetResultValue(self.buffer.get().Copy(self.buffer, c_memory_manager)) + c_buffer = GetResultValue(CBuffer.Copy(self.buffer, c_memory_manager)) return pyarrow_wrap_buffer(c_buffer) @property From 931954349a0a558d983958915a7aaa87cb00186d Mon Sep 17 00:00:00 2001 From: anjakefala Date: Thu, 19 Sep 2024 19:03:52 +0000 Subject: [PATCH 10/11] Antoine review comments --- python/pyarrow/io.pxi | 3 ++- python/pyarrow/tests/test_cuda.py | 1 + python/pyarrow/tests/test_io.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 646b33b1427..65c4485e55f 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1477,7 +1477,8 @@ cdef class Buffer(_Weakrefable): f"pyarrow.MemoryManager, got {type(destination)})" ) - c_buffer = GetResultValue(CBuffer.Copy(self.buffer, c_memory_manager)) + with nogil: + c_buffer = GetResultValue(CBuffer.Copy(self.buffer, c_memory_manager)) return pyarrow_wrap_buffer(c_buffer) @property diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index 9e9d543ee1c..ec28e4ca731 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -127,6 +127,7 @@ def test_copy_from_buffer(): for dest in [mm2, mm2.device]: buf2 = cudabuf.copy(dest) assert buf2.device_type == pa.DeviceAllocationType.CUDA + assert buf2.copy(pa.default_cpu_memory_manager()).equals(buf) cudabuf2 = cuda.CudaBuffer.from_buffer(buf2) assert cudabuf2.size == cudabuf.size assert not cudabuf2.is_cpu diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 19d68bb674a..7889badd074 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -715,8 +715,8 @@ def test_non_cpu_buffer(pickle_module): for dest in [mm, mm.device]: buf2 = cuda_buf.copy(dest) cuda_buf2 = cuda.CudaBuffer.from_buffer(buf2) - cuda_buf2.size == cuda_buf.size - cuda_buf2.copy_to_host()[:] == b'testing' + assert cuda_buf2.size == cuda_buf.size + assert cuda_buf2.copy_to_host()[:] == b'testing' assert cuda_buf2.device == mm.device # Buffers on different devices From 230abf2e114abc01b90089ab042c3076b258b5b3 Mon Sep 17 00:00:00 2001 From: anjakefala Date: Fri, 20 Sep 2024 21:57:18 +0000 Subject: [PATCH 11/11] Update for testing a Buffer instance on a non-cpu device --- python/pyarrow/tests/test_io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 7889badd074..a0fc697fdf0 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -713,11 +713,11 @@ def test_non_cpu_buffer(pickle_module): # Testing copying buffer mm = ctx.memory_manager for dest in [mm, mm.device]: - buf2 = cuda_buf.copy(dest) + buf2 = buf_on_gpu.copy(dest) cuda_buf2 = cuda.CudaBuffer.from_buffer(buf2) assert cuda_buf2.size == cuda_buf.size - assert cuda_buf2.copy_to_host()[:] == b'testing' - assert cuda_buf2.device == mm.device + assert cuda_buf2.to_pybytes() == b'testing' + assert buf2.device == mm.device # Buffers on different devices msg_device = "Device on which the data resides differs between buffers"