diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py index 04f0d33a0b..2806e2d0d5 100644 --- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py @@ -74,7 +74,6 @@ class VirtualMemoryResourceOptions: peers: Iterable[int] = field(default_factory=tuple) self_access: VirtualMemoryAccessTypeT = "rw" peer_access: VirtualMemoryAccessTypeT = "rw" - win32_handle_metadata: int | None = 0 _a = driver.CUmemAccess_flags _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} @@ -128,6 +127,8 @@ def _location_type_to_driver(spec: str): @staticmethod def _handle_type_to_driver(spec: str): + if spec == "win32": + raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) if handle_type is None: raise ValueError(f"Unsupported handle_type: {spec!r}") @@ -151,6 +152,13 @@ class VirtualMemoryResource(MemoryResource): config : VirtualMemoryResourceOptions A configuration object for the VirtualMemoryResource + + + Warning + ------- + This is a low-level API that is provided only for convenience. Make sure you fully understand + how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses + in cuda.core should already meet the common needs. """ def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): @@ -217,7 +225,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes prop.location.id = self.device.device_id prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) - prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 + prop.win32HandleMetaData = 0 # Query granularity gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) @@ -505,7 +513,7 @@ def allocate(self, size: int, stream: Stream | None = None) -> Buffer: prop.location.id = self.device.device_id if config.location_type == "device" else -1 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) - prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0 + prop.win32HandleMetaData = 0 # ---- Query and apply granularity ---- # Choose min vs recommended granularity per config diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index f536cbf78f..10f2d6e5b2 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -3,7 +3,6 @@ import ctypes import sys -from ctypes import wintypes try: from cuda.bindings import driver @@ -325,23 +324,8 @@ def test_device_memory_resource_initialization(use_device_object): def get_handle_type(): - def get_sa(): - class SECURITY_ATTRIBUTES(ctypes.Structure): - _fields_ = [ - ("nLength", wintypes.DWORD), - ("lpSecurityDescriptor", wintypes.LPVOID), - ("bInheritHandle", wintypes.BOOL), - ] - - sa = SECURITY_ATTRIBUTES() - sa.nLength = ctypes.sizeof(sa) - sa.lpSecurityDescriptor = None - sa.bInheritHandle = False # TODO: why? - - return sa - if IS_WINDOWS: - return (("win32", get_sa()), ("win32_kmt", None)) + return (("win32", None), ("win32_kmt", None)) else: return (("posix_fd", None),) @@ -362,17 +346,17 @@ def test_vmm_allocator_basic_allocation(use_device_object, handle_type): pytest.skip("Virtual memory management is not supported on this device") handle_type, security_attribute = handle_type # unpack - win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 - options = VirtualMemoryResourceOptions( - handle_type=handle_type, - win32_handle_metadata=win32_handle_metadata, - ) + options = VirtualMemoryResourceOptions(handle_type=handle_type) # Create VMM allocator with default config device_arg = device if use_device_object else device.device_id vmm_mr = VirtualMemoryResource(device_arg, config=options) # Test basic allocation - buffer = vmm_mr.allocate(4096) + try: + buffer = vmm_mr.allocate(4096) + except NotImplementedError: + assert handle_type == "win32" + return assert buffer.size >= 4096 # May be aligned up assert buffer.device_id == device.device_id assert buffer.memory_resource == vmm_mr @@ -483,16 +467,15 @@ def test_vmm_allocator_grow_allocation(handle_type): pytest.skip("Virtual memory management is not supported on this device") handle_type, security_attribute = handle_type # unpack - win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0 - options = VirtualMemoryResourceOptions( - handle_type=handle_type, - win32_handle_metadata=win32_handle_metadata, - ) - + options = VirtualMemoryResourceOptions(handle_type=handle_type) vmm_mr = VirtualMemoryResource(device, config=options) # Create initial allocation - buffer = vmm_mr.allocate(2 * 1024 * 1024) + try: + buffer = vmm_mr.allocate(2 * 1024 * 1024) + except NotImplementedError: + assert handle_type == "win32" + return original_size = buffer.size # Grow the allocation