From 459f281005dc0d19b0c06f0a74a6cd6c091ebc27 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Wed, 3 Dec 2025 23:42:20 +0000
Subject: [PATCH 1/2] purge support of win32 + add warning

---
 .../_memory/_virtual_memory_resource.py       | 15 +++++--
 cuda_core/tests/test_memory.py                | 42 ++++++-------------
 2 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
index 04f0d33a0b..667f6245d5 100644
--- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
+++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
@@ -74,7 +74,6 @@ class VirtualMemoryResourceOptions:
     peers: Iterable[int] = field(default_factory=tuple)
     self_access: VirtualMemoryAccessTypeT = "rw"
     peer_access: VirtualMemoryAccessTypeT = "rw"
-    win32_handle_metadata: int | None = 0
 
     _a = driver.CUmemAccess_flags
     _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0}
@@ -128,6 +127,9 @@ def _location_type_to_driver(spec: str):
 
     @staticmethod
     def _handle_type_to_driver(spec: str):
+        if spec == "win32":
+            raise NotImplementedError(
+                "win32 is currently not supported, please reach out to the CUDA Python team")
         handle_type = VirtualMemoryResourceOptions._handle_types.get(spec)
         if handle_type is None:
             raise ValueError(f"Unsupported handle_type: {spec!r}")
@@ -151,6 +153,13 @@ class VirtualMemoryResource(MemoryResource):
 
     config : VirtualMemoryResourceOptions
         A configuration object for the VirtualMemoryResource
+
+
+    Warning
+    -------
+        This is a low-level API that is provided only for convenience. Make sure you fully understand
+        how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses
+        in cuda.core should already meet the common needs.
     """
 
     def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None):
@@ -217,7 +226,7 @@ def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryRes
         prop.location.id = self.device.device_id
         prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0
         prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type)
-        prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
+        prop.win32HandleMetaData = 0
 
         # Query granularity
         gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity)
@@ -505,7 +514,7 @@ def allocate(self, size: int, stream: Stream | None = None) -> Buffer:
         prop.location.id = self.device.device_id if config.location_type == "device" else -1
         prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0
         prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type)
-        prop.win32HandleMetaData = self.config.win32_handle_metadata if self.config.win32_handle_metadata else 0
+        prop.win32HandleMetaData = 0
 
         # ---- Query and apply granularity ----
         # Choose min vs recommended granularity per config
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index f536cbf78f..d0fac70a42 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -325,23 +325,8 @@ def test_device_memory_resource_initialization(use_device_object):
 
 
 def get_handle_type():
-    def get_sa():
-        class SECURITY_ATTRIBUTES(ctypes.Structure):
-            _fields_ = [
-                ("nLength", wintypes.DWORD),
-                ("lpSecurityDescriptor", wintypes.LPVOID),
-                ("bInheritHandle", wintypes.BOOL),
-            ]
-
-        sa = SECURITY_ATTRIBUTES()
-        sa.nLength = ctypes.sizeof(sa)
-        sa.lpSecurityDescriptor = None
-        sa.bInheritHandle = False  # TODO: why?
-
-        return sa
-
     if IS_WINDOWS:
-        return (("win32", get_sa()), ("win32_kmt", None))
+        return (("win32", None), ("win32_kmt", None))
     else:
         return (("posix_fd", None),)
 
@@ -362,17 +347,17 @@ def test_vmm_allocator_basic_allocation(use_device_object, handle_type):
         pytest.skip("Virtual memory management is not supported on this device")
 
     handle_type, security_attribute = handle_type  # unpack
-    win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
-    options = VirtualMemoryResourceOptions(
-        handle_type=handle_type,
-        win32_handle_metadata=win32_handle_metadata,
-    )
+    options = VirtualMemoryResourceOptions(handle_type=handle_type)
     # Create VMM allocator with default config
     device_arg = device if use_device_object else device.device_id
     vmm_mr = VirtualMemoryResource(device_arg, config=options)
 
     # Test basic allocation
-    buffer = vmm_mr.allocate(4096)
+    try:
+        buffer = vmm_mr.allocate(4096)
+    except NotImplementedError:
+        assert handle_type == "win32"
+        return
     assert buffer.size >= 4096  # May be aligned up
     assert buffer.device_id == device.device_id
     assert buffer.memory_resource == vmm_mr
@@ -483,16 +468,15 @@ def test_vmm_allocator_grow_allocation(handle_type):
         pytest.skip("Virtual memory management is not supported on this device")
 
     handle_type, security_attribute = handle_type  # unpack
-    win32_handle_metadata = ctypes.addressof(security_attribute) if security_attribute else 0
-    options = VirtualMemoryResourceOptions(
-        handle_type=handle_type,
-        win32_handle_metadata=win32_handle_metadata,
-    )
-
+    options = VirtualMemoryResourceOptions(handle_type=handle_type)
     vmm_mr = VirtualMemoryResource(device, config=options)
 
     # Create initial allocation
-    buffer = vmm_mr.allocate(2 * 1024 * 1024)
+    try:
+        buffer = vmm_mr.allocate(2 * 1024 * 1024)
+    except NotImplementedError:
+        assert handle_type == "win32"
+        return
     original_size = buffer.size
 
     # Grow the allocation

From 445cdf0a909609b1eb528bf0bdfd6cceb3ed94ac Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Wed, 3 Dec 2025 23:46:56 +0000
Subject: [PATCH 2/2] fix linter errors

---
 .../cuda/core/experimental/_memory/_virtual_memory_resource.py | 3 +--
 cuda_core/tests/test_memory.py                                 | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
index 667f6245d5..2806e2d0d5 100644
--- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
+++ b/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py
@@ -128,8 +128,7 @@ def _location_type_to_driver(spec: str):
     @staticmethod
     def _handle_type_to_driver(spec: str):
         if spec == "win32":
-            raise NotImplementedError(
-                "win32 is currently not supported, please reach out to the CUDA Python team")
+            raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team")
         handle_type = VirtualMemoryResourceOptions._handle_types.get(spec)
         if handle_type is None:
             raise ValueError(f"Unsupported handle_type: {spec!r}")
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index d0fac70a42..10f2d6e5b2 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -3,7 +3,6 @@
 
 import ctypes
 import sys
-from ctypes import wintypes
 
 try:
     from cuda.bindings import driver