From e75c0967de8dffafdd1973a8078336775b245f2e Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 30 Mar 2026 10:16:31 -0700 Subject: [PATCH 1/4] Use C++ shared_ptr for IPC file descriptor cleanup Replace Python-level os.close() in IPCAllocationHandle with a C++ shared_ptr custom deleter that calls POSIX close() directly, avoiding unraisable exception errors during late interpreter shutdown. Made-with: Cursor --- cuda_core/cuda/core/_cpp/resource_handles.cpp | 21 +++++++++++++++++++ cuda_core/cuda/core/_cpp/resource_handles.hpp | 20 ++++++++++++++++++ cuda_core/cuda/core/_memory/_ipc.pxd | 5 +++-- cuda_core/cuda/core/_memory/_ipc.pyx | 20 +++++++----------- cuda_core/cuda/core/_resource_handles.pxd | 7 +++++++ cuda_core/cuda/core/_resource_handles.pyx | 6 ++++++ 6 files changed, 65 insertions(+), 14 deletions(-) diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp index b3ba00b238..03ef964b0d 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.cpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp @@ -12,6 +12,10 @@ #include #include +#ifndef _WIN32 +#include +#endif + namespace cuda_core { // ============================================================================ @@ -1116,4 +1120,21 @@ CuLinkHandle create_culink_handle_ref(CUlinkState state) { return CuLinkHandle(box, &box->resource); } +// ============================================================================ +// File Descriptor Handles +// ============================================================================ + +#ifndef _WIN32 +FileDescriptorHandle create_fd_handle(int fd) { + return FileDescriptorHandle( + new int(fd), + [](const int* p) { ::close(*p); delete p; } + ); +} +#endif + +FileDescriptorHandle create_fd_handle_ref(int fd) { + return std::make_shared(fd); +} + } // namespace cuda_core diff --git a/cuda_core/cuda/core/_cpp/resource_handles.hpp b/cuda_core/cuda/core/_cpp/resource_handles.hpp index 6d3598d916..92d3cd4669 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.hpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.hpp @@ -154,6 +154,7 @@ using NvrtcProgramHandle = std::shared_ptr; using NvvmProgramHandle = std::shared_ptr; using NvJitLinkHandle = std::shared_ptr; using CuLinkHandle = std::shared_ptr; +using FileDescriptorHandle = std::shared_ptr; // ============================================================================ @@ -477,6 +478,17 @@ CuLinkHandle create_culink_handle(CUlinkState state); // The handle will NOT be destroyed when the last reference is released. CuLinkHandle create_culink_handle_ref(CUlinkState state); +// ============================================================================ +// File descriptor handle functions +// ============================================================================ + +// Create an owning file descriptor handle. +// When the last reference is released, POSIX close() is called. +FileDescriptorHandle create_fd_handle(int fd); + +// Create a non-owning file descriptor handle (caller manages the fd). +FileDescriptorHandle create_fd_handle_ref(int fd); + // ============================================================================ // Overloaded helper functions to extract raw resources from handles // ============================================================================ @@ -596,6 +608,10 @@ inline std::intptr_t as_intptr(const CuLinkHandle& h) noexcept { return reinterpret_cast(as_cu(h)); } +inline std::intptr_t as_intptr(const FileDescriptorHandle& h) noexcept { + return h ? static_cast(*h) : -1; +} + // as_py() - convert handle to Python wrapper object (returns new reference) #if PY_VERSION_HEX < 0x030D0000 extern "C" int _Py_IsFinalizing(void); @@ -687,4 +703,8 @@ inline PyObject* as_py(const GraphicsResourceHandle& h) noexcept { return detail::make_py("cuda.bindings.driver", "CUgraphicsResource", as_intptr(h)); } +inline PyObject* as_py(const FileDescriptorHandle& h) noexcept { + return PyLong_FromSsize_t(as_intptr(h)); +} + } // namespace cuda_core diff --git a/cuda_core/cuda/core/_memory/_ipc.pxd b/cuda_core/cuda/core/_memory/_ipc.pxd index 5166aa8748..1c08fb6a03 100644 --- a/cuda_core/cuda/core/_memory/_ipc.pxd +++ b/cuda_core/cuda/core/_memory/_ipc.pxd @@ -5,6 +5,7 @@ from cuda.bindings cimport cydriver from cuda.core._memory._buffer cimport Buffer from cuda.core._memory._memory_pool cimport _MemPool +from cuda.core._resource_handles cimport FileDescriptorHandle # Holds _MemPool objects imported by this process. This enables @@ -46,8 +47,8 @@ cdef class IPCBufferDescriptor: cdef class IPCAllocationHandle: cdef: - int _handle - object _uuid + FileDescriptorHandle _h_fd + object _uuid cpdef close(self) diff --git a/cuda_core/cuda/core/_memory/_ipc.pyx b/cuda_core/cuda/core/_memory/_ipc.pyx index e1174937a2..88a1d9c169 100644 --- a/cuda_core/cuda/core/_memory/_ipc.pyx +++ b/cuda_core/cuda/core/_memory/_ipc.pyx @@ -10,10 +10,13 @@ from cuda.core._memory._memory_pool cimport _MemPool from cuda.core._stream cimport Stream from cuda.core._resource_handles cimport ( DevicePtrHandle, + create_fd_handle, create_mempool_handle_ipc, deviceptr_import_ipc, get_last_error, as_cu, + as_intptr, + as_py, ) from cuda.core._stream cimport default_stream @@ -110,31 +113,24 @@ cdef class IPCAllocationHandle: def _init(cls, handle: int, uuid): # no-cython-lint cdef IPCAllocationHandle self = IPCAllocationHandle.__new__(cls) assert handle >= 0 - self._handle = handle + self._h_fd = create_fd_handle(handle) self._uuid = uuid return self cpdef close(self): """Close the handle.""" - if self._handle >= 0: - try: - os.close(self._handle) - finally: - self._handle = -1 - - def __dealloc__(self): - self.close() + self._h_fd.reset() def __int__(self) -> int: - if self._handle < 0: + if not self._h_fd or as_intptr(self._h_fd) < 0: raise ValueError( f"Cannot convert IPCAllocationHandle to int: the handle (id={id(self)}) is closed." ) - return self._handle + return as_py(self._h_fd) @property def handle(self) -> int: - return self._handle + return as_py(self._h_fd) @property def uuid(self) -> uuid.UUID: diff --git a/cuda_core/cuda/core/_resource_handles.pxd b/cuda_core/cuda/core/_resource_handles.pxd index 1cec3bc5cb..419106f04a 100644 --- a/cuda_core/cuda/core/_resource_handles.pxd +++ b/cuda_core/cuda/core/_resource_handles.pxd @@ -41,6 +41,7 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": ctypedef shared_ptr[const NvJitLinkValue] NvJitLinkHandle ctypedef shared_ptr[const cydriver.CUlinkState] CuLinkHandle + ctypedef shared_ptr[const int] FileDescriptorHandle # as_cu() - extract the raw CUDA handle (inline C++) cydriver.CUcontext as_cu(ContextHandle h) noexcept nogil @@ -73,6 +74,7 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": intptr_t as_intptr(NvvmProgramHandle h) noexcept nogil intptr_t as_intptr(NvJitLinkHandle h) noexcept nogil intptr_t as_intptr(CuLinkHandle h) noexcept nogil + intptr_t as_intptr(FileDescriptorHandle h) noexcept nogil # as_py() - convert handle to Python wrapper object (inline C++; requires GIL) object as_py(ContextHandle h) @@ -89,6 +91,7 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": object as_py(NvvmProgramHandle h) object as_py(NvJitLinkHandle h) object as_py(CuLinkHandle h) + object as_py(FileDescriptorHandle h) # ============================================================================= @@ -203,3 +206,7 @@ cdef NvJitLinkHandle create_nvjitlink_handle_ref(cynvjitlink.nvJitLinkHandle han # cuLink handles cdef CuLinkHandle create_culink_handle(cydriver.CUlinkState state) except+ nogil cdef CuLinkHandle create_culink_handle_ref(cydriver.CUlinkState state) except+ nogil + +# File descriptor handles +cdef FileDescriptorHandle create_fd_handle(int fd) except+ nogil +cdef FileDescriptorHandle create_fd_handle_ref(int fd) except+ nogil diff --git a/cuda_core/cuda/core/_resource_handles.pyx b/cuda_core/cuda/core/_resource_handles.pyx index 0215aaf976..8a2c17b280 100644 --- a/cuda_core/cuda/core/_resource_handles.pyx +++ b/cuda_core/cuda/core/_resource_handles.pyx @@ -191,6 +191,12 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" ( cydriver.CUlinkState state) except+ nogil + # File descriptor handles + FileDescriptorHandle create_fd_handle "cuda_core::create_fd_handle" ( + int fd) except+ nogil + FileDescriptorHandle create_fd_handle_ref "cuda_core::create_fd_handle_ref" ( + int fd) except+ nogil + # ============================================================================= # CUDA Driver API capsule From 740039e7c62f6eeaff8f73489d0548559339976f Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 30 Mar 2026 11:03:17 -0700 Subject: [PATCH 2/4] Fix Windows build: use _close() on Win32 for fd cleanup Made-with: Cursor --- cuda_core/cuda/core/_cpp/resource_handles.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp index 03ef964b0d..a60de12100 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.cpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp @@ -12,7 +12,9 @@ #include #include -#ifndef _WIN32 +#ifdef _WIN32 +#include +#else #include #endif @@ -1124,14 +1126,19 @@ CuLinkHandle create_culink_handle_ref(CUlinkState state) { // File Descriptor Handles // ============================================================================ -#ifndef _WIN32 FileDescriptorHandle create_fd_handle(int fd) { return FileDescriptorHandle( new int(fd), - [](const int* p) { ::close(*p); delete p; } + [](const int* p) { +#ifdef _WIN32 + ::_close(*p); +#else + ::close(*p); +#endif + delete p; + } ); } -#endif FileDescriptorHandle create_fd_handle_ref(int fd) { return std::make_shared(fd); From c5ba8f2c6fda333bb23af61d099400eb6bc3012e Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 30 Mar 2026 11:10:42 -0700 Subject: [PATCH 3/4] Windows: no-op deleter for fd handle (IPC is Linux-only) Made-with: Cursor --- cuda_core/cuda/core/_cpp/resource_handles.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp index a60de12100..f53cc66402 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.cpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp @@ -12,9 +12,7 @@ #include #include -#ifdef _WIN32 -#include -#else +#ifndef _WIN32 #include #endif @@ -1130,9 +1128,7 @@ FileDescriptorHandle create_fd_handle(int fd) { return FileDescriptorHandle( new int(fd), [](const int* p) { -#ifdef _WIN32 - ::_close(*p); -#else +#ifndef _WIN32 ::close(*p); #endif delete p; From 19abcf0c8a49cf27ffa13889dc95a854a0612757 Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 30 Mar 2026 11:13:49 -0700 Subject: [PATCH 4/4] Fix Windows build: no-op fd handle with runtime_error guard create_fd_handle and create_fd_handle_ref throw std::runtime_error on Windows since POSIX file descriptors are Linux-only. Made-with: Cursor --- cuda_core/cuda/core/_cpp/resource_handles.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp index f53cc66402..0e3d2d7857 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.cpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -1125,19 +1126,22 @@ CuLinkHandle create_culink_handle_ref(CUlinkState state) { // ============================================================================ FileDescriptorHandle create_fd_handle(int fd) { +#ifdef _WIN32 + throw std::runtime_error("create_fd_handle is not supported on Windows"); +#else return FileDescriptorHandle( new int(fd), - [](const int* p) { -#ifndef _WIN32 - ::close(*p); -#endif - delete p; - } + [](const int* p) { ::close(*p); delete p; } ); +#endif } FileDescriptorHandle create_fd_handle_ref(int fd) { +#ifdef _WIN32 + throw std::runtime_error("create_fd_handle_ref is not supported on Windows"); +#else return std::make_shared(fd); +#endif } } // namespace cuda_core