Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ class VirtualMemoryResourceOptions:
peers: Iterable[int] = field(default_factory=tuple)
self_access: VirtualMemoryAccessTypeT = "rw"
peer_access: VirtualMemoryAccessTypeT = "rw"
win32_handle_metadata: int | None = 0

_a = driver.CUmemAccess_flags
_access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0}
Expand Down Expand Up @@ -128,6 +127,8 @@ def _location_type_to_driver(spec: str):

@staticmethod
def _handle_type_to_driver(spec: str):
if spec == "win32":
raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team")
handle_type = VirtualMemoryResourceOptions._handle_types.get(spec)
if handle_type is None:
raise ValueError(f"Unsupported handle_type: {spec!r}")
Expand All @@ -151,6 +152,13 @@ class VirtualMemoryResource(MemoryResource):

config : VirtualMemoryResourceOptions
A configuration object for the VirtualMemoryResource


Warning
-------
This is a low-level API that is provided only for convenience. Make sure you fully understand
how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses
in cuda.core should already meet the common needs.
"""

def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None):
Expand Down Expand Up @@ -217,7 +225,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes
prop.location.id = self.device.device_id
prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0
prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type)
prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
prop.win32HandleMetaData = 0

# Query granularity
gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity)
Expand Down Expand Up @@ -505,7 +513,7 @@ def allocate(self, size: int, stream: Stream | None = None) -> Buffer:
prop.location.id = self.device.device_id if config.location_type == "device" else -1
prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0
prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type)
prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
prop.win32HandleMetaData = 0

# ---- Query and apply granularity ----
# Choose min vs recommended granularity per config
Expand Down
43 changes: 13 additions & 30 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import ctypes
import sys
from ctypes import wintypes

try:
from cuda.bindings import driver
Expand Down Expand Up @@ -325,23 +324,8 @@ def test_device_memory_resource_initialization(use_device_object):


def get_handle_type():
def get_sa():
class SECURITY_ATTRIBUTES(ctypes.Structure):
_fields_ = [
("nLength", wintypes.DWORD),
("lpSecurityDescriptor", wintypes.LPVOID),
("bInheritHandle", wintypes.BOOL),
]

sa = SECURITY_ATTRIBUTES()
sa.nLength = ctypes.sizeof(sa)
sa.lpSecurityDescriptor = None
sa.bInheritHandle = False # TODO: why?

return sa

if IS_WINDOWS:
return (("win32", get_sa()), ("win32_kmt", None))
return (("win32", None), ("win32_kmt", None))
else:
return (("posix_fd", None),)

Expand All @@ -362,17 +346,17 @@ def test_vmm_allocator_basic_allocation(use_device_object, handle_type):
pytest.skip("Virtual memory management is not supported on this device")

handle_type, security_attribute = handle_type # unpack
win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
options = VirtualMemoryResourceOptions(
handle_type=handle_type,
win32_handle_metadata=win32_handle_metadata,
)
options = VirtualMemoryResourceOptions(handle_type=handle_type)
# Create VMM allocator with default config
device_arg = device if use_device_object else device.device_id
vmm_mr = VirtualMemoryResource(device_arg, config=options)

# Test basic allocation
buffer = vmm_mr.allocate(4096)
try:
buffer = vmm_mr.allocate(4096)
except NotImplementedError:
assert handle_type == "win32"
return
assert buffer.size >= 4096 # May be aligned up
assert buffer.device_id == device.device_id
assert buffer.memory_resource == vmm_mr
Expand Down Expand Up @@ -483,16 +467,15 @@ def test_vmm_allocator_grow_allocation(handle_type):
pytest.skip("Virtual memory management is not supported on this device")

handle_type, security_attribute = handle_type # unpack
win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
options = VirtualMemoryResourceOptions(
handle_type=handle_type,
win32_handle_metadata=win32_handle_metadata,
)

options = VirtualMemoryResourceOptions(handle_type=handle_type)
vmm_mr = VirtualMemoryResource(device, config=options)

# Create initial allocation
buffer = vmm_mr.allocate(2 * 1024 * 1024)
try:
buffer = vmm_mr.allocate(2 * 1024 * 1024)
except NotImplementedError:
assert handle_type == "win32"
return
original_size = buffer.size

# Grow the allocation
Expand Down
Loading