diff --git a/cuda_bindings/cuda/bindings/_lib/windll.pxd b/cuda_bindings/cuda/bindings/_lib/windll.pxd index e3f86285e0..7b190f3595 100644 --- a/cuda_bindings/cuda/bindings/_lib/windll.pxd +++ b/cuda_bindings/cuda/bindings/_lib/windll.pxd @@ -12,6 +12,7 @@ cdef extern from "windows.h" nogil: ctypedef unsigned long DWORD ctypedef const wchar_t *LPCWSTR ctypedef const char *LPCSTR + ctypedef int BOOL cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 @@ -23,6 +24,8 @@ cdef extern from "windows.h" nogil: FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + BOOL _FreeLibrary "FreeLibrary"(HMODULE hLibModule) + cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): cdef uintptr_t result cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) @@ -37,3 +40,6 @@ cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): cdef inline FARPROC GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: return _GetProcAddress(hModule, lpProcName) + +cdef inline BOOL FreeLibrary(uintptr_t hLibModule) nogil: + return _FreeLibrary(hLibModule) diff --git a/cuda_bindings/cuda/bindings/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/cyruntime.pyx.in index 7f5c96e05c..950e106c53 100644 --- a/cuda_bindings/cuda/bindings/cyruntime.pyx.in +++ b/cuda_bindings/cuda/bindings/cyruntime.pyx.in @@ -1885,35 +1885,46 @@ cdef cudaError_t cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** r {{if True}} -{{if 'Windows' != platform.system()}} +from libc.stdint cimport uintptr_t +from cuda.pathfinder import load_nvidia_dynamic_lib +{{if 'Windows' == platform.system()}} +cimport cuda.bindings._lib.windll as windll +{{else}} cimport cuda.bindings._lib.dlfcn as dlfcn {{endif}} cdef cudaError_t getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil: - {{if 'Windows' == platform.system()}} - with gil: - raise NotImplementedError('"getLocalRuntimeVersion" is unsupported on Windows') - {{else}} + # Load - handle = dlfcn.dlopen('libcudart.so.13', dlfcn.RTLD_NOW) - if handle == NULL: - with gil: - raise RuntimeError(f'Failed to dlopen libcudart.so.13') + with gil: + loaded_dl = load_nvidia_dynamic_lib("cudart") + {{if 'Windows' == platform.system()}} + handle = loaded_dl._handle_uint + {{else}} + handle = loaded_dl._handle_uint + {{endif}} + {{if 'Windows' == platform.system()}} + __cudaRuntimeGetVersion = windll.GetProcAddress(handle, b'cudaRuntimeGetVersion') + {{else}} __cudaRuntimeGetVersion = dlfcn.dlsym(handle, 'cudaRuntimeGetVersion') + {{endif}} if __cudaRuntimeGetVersion == NULL: with gil: - raise RuntimeError(f'Function "cudaRuntimeGetVersion" not found in libcudart.so.13') + raise RuntimeError(f'Function "cudaRuntimeGetVersion" not found in {loaded_dl.abs_path}') # Call cdef cudaError_t err = cudaSuccess err = ( __cudaRuntimeGetVersion)(runtimeVersion) # Unload + {{if 'Windows' == platform.system()}} + windll.FreeLibrary(handle) + {{else}} dlfcn.dlclose(handle) + {{endif}} # Return return err - {{endif}} {{endif}} diff --git a/cuda_bindings/docs/source/release/12.9.X-notes.rst b/cuda_bindings/docs/source/release/12.9.X-notes.rst index 76de5d795f..7a4713a89b 100644 --- a/cuda_bindings/docs/source/release/12.9.X-notes.rst +++ b/cuda_bindings/docs/source/release/12.9.X-notes.rst @@ -15,6 +15,7 @@ Highlights * Automatic CUDA library path detection based on ``CUDA_HOME``, eliminating the need to manually set ``LIBRARY_PATH`` environment variables for installation. * The Python overhead of calling functions in CUDA bindings in ``driver``, ``runtime`` and ``nvrtc`` has been reduced by approximately 30%. * Updated the ``cuda.bindings.runtime`` module to statically link against the CUDA Runtime library from CUDA Toolkit 12.9.1. +* ``cyruntime.getLocalRuntimeVersion`` now uses pathfinder to find the CUDA runtime. Known issues diff --git a/cuda_bindings/docs/source/release/13.X.Y-notes.rst b/cuda_bindings/docs/source/release/13.X.Y-notes.rst index 9e57410ff0..2f29a9dc08 100644 --- a/cuda_bindings/docs/source/release/13.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/13.X.Y-notes.rst @@ -18,6 +18,7 @@ Highlights * The Python overhead of calling functions in CUDA bindings in ``driver``, ``runtime`` and ``nvrtc`` has been reduced by approximately 30%. * On Windows, the ``pywin32`` dependency has been removed. The necessary Windows API functions are now accessed directly. * Updated the ``cuda.bindings.runtime`` module to statically link against the CUDA Runtime library from CUDA Toolkit 13.0.1. +* ``cyruntime.getLocalRuntimeVersion`` now uses pathfinder to find the CUDA runtime. Known issues diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index 21e902733f..6f8fc009eb 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -9,6 +9,7 @@ import cuda.bindings.driver as cuda import cuda.bindings.runtime as cudart +from cuda import pathfinder from cuda.bindings import runtime @@ -1400,3 +1401,13 @@ def test_struct_pointer_comparison(target): c = target(456) assert a != c assert hash(a) != hash(c) + + +def test_getLocalRuntimeVersion(): + try: + err, version = cudart.getLocalRuntimeVersion() + except pathfinder.DynamicLibNotFoundError: + pytest.skip("cudart dynamic lib not available") + else: + assertSuccess(err) + assert version >= 12000 # CUDA 12.0