Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ def print_entry(label, value):
RunEndEncodedScalar, ExtensionScalar)

# Buffers, allocation
from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
default_cpu_memory_manager)

from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
Codec, compress, decompress, allocate_buffer)

Expand Down
162 changes: 162 additions & 0 deletions python/pyarrow/device.pxi
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True


cpdef enum DeviceAllocationType:
CPU = <char> CDeviceAllocationType_kCPU
CUDA = <char> CDeviceAllocationType_kCUDA
CUDA_HOST = <char> CDeviceAllocationType_kCUDA_HOST
OPENCL = <char> CDeviceAllocationType_kOPENCL
VULKAN = <char> CDeviceAllocationType_kVULKAN
METAL = <char> CDeviceAllocationType_kMETAL
VPI = <char> CDeviceAllocationType_kVPI
ROCM = <char> CDeviceAllocationType_kROCM
ROCM_HOST = <char> CDeviceAllocationType_kROCM_HOST
EXT_DEV = <char> CDeviceAllocationType_kEXT_DEV
CUDA_MANAGED = <char> CDeviceAllocationType_kCUDA_MANAGED
ONEAPI = <char> CDeviceAllocationType_kONEAPI
WEBGPU = <char> CDeviceAllocationType_kWEBGPU
HEXAGON = <char> CDeviceAllocationType_kHEXAGON


cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type):
return DeviceAllocationType(<char> device_type)


cdef class Device(_Weakrefable):
"""
Abstract interface for hardware devices

This object represents a device with access to some memory spaces.
When handling a Buffer or raw memory address, it allows deciding in which
context the raw memory address should be interpreted
(e.g. CPU-accessible memory, or embedded memory on some particular GPU).
"""

def __init__(self):
raise TypeError("Do not call Device's constructor directly, "
"use the device attribute of the MemoryManager instead.")

cdef void init(self, const shared_ptr[CDevice]& device):
self.device = device

@staticmethod
cdef wrap(const shared_ptr[CDevice]& device):
cdef Device self = Device.__new__(Device)
self.init(device)
return self

def __eq__(self, other):
if not isinstance(other, Device):
return False
return self.device.get().Equals(deref((<Device>other).device.get()))

def __repr__(self):
return "<pyarrow.Device: {}>".format(frombytes(self.device.get().ToString()))

@property
def type_name(self):
"""
A shorthand for this device's type.
"""
return frombytes(self.device.get().type_name())

@property
def device_id(self):
"""
A device ID to identify this device if there are multiple of this type.

If there is no "device_id" equivalent (such as for the main CPU device on
non-numa systems) returns -1.
"""
return self.device.get().device_id()

@property
def is_cpu(self):
"""
Whether this device is the main CPU device.

This shorthand method is very useful when deciding whether a memory address
is CPU-accessible.
"""
return self.device.get().is_cpu()

@property
def device_type(self):
"""
Return the DeviceAllocationType of this device.
"""
return _wrap_device_allocation_type(self.device.get().device_type())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we wrap device_type, but not other properties like is_cpu?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_cpu is a simple bool, and cython takes care of returning that as a python object. For the type here I wanted to return a python enum (and not just an integer)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation!



cdef class MemoryManager(_Weakrefable):
"""
An object that provides memory management primitives.

A MemoryManager is always tied to a particular Device instance.
It can also have additional parameters (such as a MemoryPool to
allocate CPU memory).

"""

def __init__(self):
raise TypeError("Do not call MemoryManager's constructor directly, "
"use pyarrow.default_cpu_memory_manager() instead.")

cdef void init(self, const shared_ptr[CMemoryManager]& mm):
self.memory_manager = mm

@staticmethod
cdef wrap(const shared_ptr[CMemoryManager]& mm):
cdef MemoryManager self = MemoryManager.__new__(MemoryManager)
self.init(mm)
return self

def __repr__(self):
return "<pyarrow.MemoryManager device: {}>".format(
frombytes(self.memory_manager.get().device().get().ToString())
)

@property
def device(self):
"""
The device this MemoryManager is tied to.
"""
return Device.wrap(self.memory_manager.get().device())

@property
def is_cpu(self):
"""
Whether this MemoryManager is tied to the main CPU device.

This shorthand method is very useful when deciding whether a memory
address is CPU-accessible.
"""
return self.memory_manager.get().is_cpu()


def default_cpu_memory_manager():
"""
Return the default CPU MemoryManager instance.

The returned singleton instance uses the default MemoryPool.
"""
return MemoryManager.wrap(c_default_cpu_memory_manager())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you plan to later subclass Device with CPUDevice and MemoryManager with CPUMemoryManager, respectively?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessarily, for now, if not needed? For CPU I am not sure if it is worth it, as looking at device.h, it seems the main thing the subclass provides is allowing to specify a memory pool when creating (which we could also expose through default_cpu_memory_manager if we want to expose this?
For CUDA there might be more CUDA-specific methods/attributes where it might be more worth providing a subclass.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, we can do add them later when desired.

35 changes: 35 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool):
CProxyMemoryPool(CMemoryPool*)

ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType":
CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU"
CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA"
CDeviceAllocationType_kCUDA_HOST "arrow::DeviceAllocationType::kCUDA_HOST"
CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL"
CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN"
CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL"
CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI"
CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM"
CDeviceAllocationType_kROCM_HOST "arrow::DeviceAllocationType::kROCM_HOST"
CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV"
CDeviceAllocationType_kCUDA_MANAGED "arrow::DeviceAllocationType::kCUDA_MANAGED"
CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI"
CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU"
CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON"

cdef cppclass CDevice" arrow::Device":
const char* type_name()
c_string ToString()
c_bool Equals(const CDevice& other)
int64_t device_id()
c_bool is_cpu() const
shared_ptr[CMemoryManager] default_memory_manager()
CDeviceAllocationType device_type()

cdef cppclass CMemoryManager" arrow::MemoryManager":
const shared_ptr[CDevice] device()
c_bool is_cpu() const

shared_ptr[CMemoryManager] c_default_cpu_memory_manager \
" arrow::default_cpu_memory_manager"()

cdef cppclass CBuffer" arrow::Buffer":
CBuffer(const uint8_t* data, int64_t size)
const uint8_t* data()
Expand All @@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool is_mutable() const
c_string ToHexString()
c_bool Equals(const CBuffer& other)
shared_ptr[CDevice] device()
const shared_ptr[CMemoryManager] memory_manager()
CDeviceAllocationType device_type()

CResult[shared_ptr[CBuffer]] SliceBufferSafe(
const shared_ptr[CBuffer]& buffer, int64_t offset)
Expand Down
33 changes: 33 additions & 0 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable):
"""
return self.buffer.get().is_cpu()

@property
def device(self):
"""
The device where the buffer resides.

Returns
-------
Device
"""
return Device.wrap(self.buffer.get().device())

@property
def memory_manager(self):
"""
The memory manager associated with the buffer.

Returns
-------
MemoryManager
"""
return MemoryManager.wrap(self.buffer.get().memory_manager())

@property
def device_type(self):
"""
The device type where the buffer resides.

Returns
-------
DeviceAllocationType
"""
return _wrap_device_allocation_type(self.buffer.get().device_type())

@property
def parent(self):
cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
Expand Down
20 changes: 20 additions & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular):
cdef void init(self, const shared_ptr[CRecordBatch]& table)


cdef class Device(_Weakrefable):
cdef:
shared_ptr[CDevice] device

cdef void init(self, const shared_ptr[CDevice]& device)

@staticmethod
cdef wrap(const shared_ptr[CDevice]& device)


cdef class MemoryManager(_Weakrefable):
cdef:
shared_ptr[CMemoryManager] memory_manager

cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)

@staticmethod
cdef wrap(const shared_ptr[CMemoryManager]& mm)


cdef class Buffer(_Weakrefable):
cdef:
shared_ptr[CBuffer] buffer
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ include "pandas-shim.pxi"
# Memory pools and allocation
include "memory.pxi"

# Device type and memory manager
include "device.pxi"

# DataType, Field, Schema
include "types.pxi"

Expand Down
43 changes: 43 additions & 0 deletions python/pyarrow/tests/test_device.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import pyarrow as pa


def test_device_memory_manager():
mm = pa.default_cpu_memory_manager()
assert mm.is_cpu
device = mm.device
assert device.is_cpu
assert device.device_id == -1
assert device.device_type == pa.DeviceAllocationType.CPU
assert device.type_name == "arrow::CPUDevice"
assert device == device
assert repr(device) == "<pyarrow.Device: CPUDevice()>"
assert repr(mm) == "<pyarrow.MemoryManager device: CPUDevice()>"


def test_buffer_device():
arr = pa.array([0, 1, 2])
buf = arr.buffers()[1]
assert buf.device_type == pa.DeviceAllocationType.CPU
assert isinstance(buf.device, pa.Device)
assert isinstance(buf.memory_manager, pa.MemoryManager)
assert buf.is_cpu
assert buf.device.is_cpu
assert buf.device == pa.default_cpu_memory_manager().device
assert buf.memory_manager.is_cpu
2 changes: 2 additions & 0 deletions python/pyarrow/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows():
pa.MemoryPool,
pa.LoggingMemoryPool,
pa.ProxyMemoryPool,
pa.Device,
pa.MemoryManager,
])
def test_extension_type_constructor_errors(klass):
# ARROW-2638: prevent calling extension class constructors directly
Expand Down