diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index d2bb0b63b0..caf36d40e8 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,12 +9,13 @@ # This code was automatically generated with version 12.8.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os +import site +import struct import win32api +from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn -from libc.stdint cimport uintptr_t {{endif}} -from cuda.bindings import path_finder cdef bint __cuPythonInit = False {{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}} @@ -45,18 +46,65 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: - {{if 'Windows' != platform.system()}} - cdef void* handle = NULL - {{endif}} - global __cuPythonInit if __cuPythonInit: return 0 __cuPythonInit = True + # Load library + {{if 'Windows' == platform.system()}} + with gil: + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") + except: + handle = None + + # Check if DLLs can be found within pip installations + if not handle: + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + site_packages = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_packages: + mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, "nvrtc64_120_0.dll"), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + + # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is + # located in the same mod_path. + # Update PATH environ so that the two dlls can find each other + os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) + except: + pass + else: + break + else: + # Else try default search + # Only reached if DLL wasn't found in any site-package path + LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + try: + handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + except: + pass + + if not handle: + raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') + {{else}} + handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) + if handle == NULL: + with gil: + raise RuntimeError('Failed to dlopen libnvrtc.so.12') + {{endif}} + + + # Load function {{if 'Windows' == platform.system()}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -241,8 +289,6 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} - with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 78b4d802b3..9961a21050 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,11 +4,11 @@ # # This code was automatically generated across versions from 12.0.1 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport intptr_t -from .utils import FunctionNotFoundError, NotSupportedError +from .utils cimport get_nvjitlink_dso_version_suffix -from cuda.bindings import path_finder +from .utils import FunctionNotFoundError, NotSupportedError ############################################################################### # Extern @@ -52,9 +52,17 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle - return handle +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index b306a30017..9798204424 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,9 +6,12 @@ from libc.stdint cimport intptr_t +from .utils cimport get_nvjitlink_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError -from cuda.bindings import path_finder +import os +import site import win32api @@ -39,9 +42,44 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle - return handle +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink_{suffix}0_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + return win32api.GetModuleHandle(dll_name) + except: + pass + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + return win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + # Finally, try default search + # Only reached if DLL wasn't found in any site-package path + try: + return win32api.LoadLibrary(dll_name) + except: + pass + + raise RuntimeError('Failed to load nvJitLink') cdef int _check_or_init_nvjitlink() except -1 nogil: @@ -50,16 +88,15 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle with gil: # Load driver to check version try: - nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -67,7 +104,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 82335508be..64e78e75a9 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,11 +4,11 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport intptr_t -from .utils import FunctionNotFoundError, NotSupportedError +from .utils cimport get_nvvm_dso_version_suffix -from cuda.bindings import path_finder +from .utils import FunctionNotFoundError, NotSupportedError ############################################################################### # Extern @@ -51,8 +51,16 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle - return handle + cdef void* handle + for suffix in get_nvvm_dso_version_suffix(driver_ver): + so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 21b4d94180..9f507e8e1b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,9 +6,12 @@ from libc.stdint cimport intptr_t +from .utils cimport get_nvvm_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError -from cuda.bindings import path_finder +import os +import site import win32api @@ -37,9 +40,52 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle - return handle +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + ["conda"] + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvvm_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = "nvvm64_40_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + return win32api.GetModuleHandle(dll_name) + except: + pass + + # Next, check if DLLs are installed via pip or conda + for sp in get_site_packages(): + if sp == "conda": + # nvvm is not under $CONDA_PREFIX/lib, so it's not in the default search path + conda_prefix = os.environ.get("CONDA_PREFIX") + if conda_prefix is None: + continue + mod_path = os.path.join(conda_prefix, "Library", "nvvm", "bin") + else: + mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + return win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + + # Finally, try default search + # Only reached if DLL wasn't found in any site-package path + try: + return win32api.LoadLibrary(dll_name) + except: + pass + + raise RuntimeError('Failed to load nvvm') cdef int _check_or_init_nvvm() except -1 nogil: @@ -48,16 +94,15 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle with gil: # Load driver to check version try: - nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -65,7 +110,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index a4b71c5314..cac7846ff7 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,3 +165,6 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) +cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 7fc77b22c2..0a693c052a 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,3 +127,17 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass + + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): + if 12000 <= driver_ver < 13000: + return ('12', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') + + +cdef tuple get_nvvm_dso_version_suffix(int driver_ver): + if 11000 <= driver_ver < 11020: + return ('3', '') + if 11020 <= driver_ver < 13000: + return ('4', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md new file mode 100644 index 0000000000..94b80499ff --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -0,0 +1,80 @@ +# `cuda.bindings.path_finder` Module + +## Public API (Work in Progress) + +Currently exposes two primary interfaces: + +``` +cuda.bindings.path_finder._SUPPORTED_LIBNAMES # ('nvJitLink', 'nvrtc', 'nvvm') +cuda.bindings.path_finder._load_nvidia_dynamic_library(libname: str) -> LoadedDL +``` + +**Note:** +These APIs are prefixed with an underscore because they are considered +experimental while undergoing active development, although already +reasonably well-tested through CI pipelines. + +## Library Loading Search Priority + +The `load_nvidia_dynamic_library()` function implements a hierarchical search +strategy for locating NVIDIA shared libraries: + +0. **Check if a library was loaded into the process already by some other means.** + - If yes, there is no alternative to skipping the rest of the search logic. + The absolute path of the already loaded library will be returned, along + with the handle to the library. + +1. **Python Package Ecosystem** + - Scans `sys.path` to find libraries installed via NVIDIA Python wheels. + +2. **Conda Environments** + - Leverages Conda-specific paths through our fork of `get_cuda_paths()` + from numba-cuda. + +3. **Environment variables** + - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. + +4. **System Installations** + - Checks traditional system locations through these paths: + - Linux: `/usr/local/cuda/lib64` + - Windows: `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` + (where X.Y is the CTK version) + - **Notably does NOT search**: + - Versioned CUDA directories like `/usr/local/cuda-12.3` + - Distribution-specific packages (RPM/DEB) + EXCEPT Debian's `nvidia-cuda-toolkit` + +5. **OS Default Mechanisms** + - Falls back to native loader: + - `dlopen()` on Linux + - `LoadLibraryW()` on Windows + +Note that the search is done on a per-library basis. There is no centralized +mechanism that ensures all libraries are found in the same way. + +## Implementation Philosophy + +The current implementation balances stability and evolution: + +- **Baseline Foundation:** Uses a fork of numba-cuda's `cuda_paths.py` that has been + battle-tested in production environments. + +- **Validation Infrastructure:** Comprehensive CI testing matrix being developed to cover: + - Various Linux/Windows environments + - Python packaging formats (wheels, conda) + - CUDA Toolkit versions + +- **Roadmap:** Planned refactoring to: + - Unify library discovery logic + - Improve maintainability + - Better enforce search priority + - Expand platform support + +## Maintenance Requirements + +These key components must be updated for new CUDA Toolkit releases: + +- `supported_libs.SUPPORTED_LIBNAMES` +- `supported_libs.SUPPORTED_WINDOWS_DLLS` +- `supported_libs.SUPPORTED_LINUX_SONAMES` +- `supported_libs.EXPECTED_LIB_SYMBOLS` diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py index e27e6f54bc..80f4e0149f 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py @@ -1,3 +1,40 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# Forked from: +# https://github.com/NVIDIA/numba-cuda/blob/8c9c9d0cb901c06774a9abea6d12b6a4b0287e5e/numba_cuda/numba/cuda/cuda_paths.py + +# The numba-cuda version in turn was forked from: +# https://github.com/numba/numba/blob/6c8a71ffc3eaa1c68e1bac927b80ee7469002b3f/numba/cuda/cuda_paths.py +# SPDX-License-Identifier: BSD-2-Clause +# +# Original Numba LICENSE: +# Copyright (c) 2012, Anaconda, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import ctypes import os import platform import re @@ -8,7 +45,7 @@ from collections import namedtuple from pathlib import Path -from .findlib import find_file, find_lib +from cuda.bindings._path_finder.findlib import find_lib IS_WIN32 = sys.platform.startswith("win32") @@ -48,6 +85,37 @@ def _readenv(name, ctor, default): config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() +SEARCH_PRIORITY = [ + "Conda environment", + "Conda environment (NVIDIA package)", + "NVIDIA NVCC Wheel", + "CUDA_HOME", + "System", + "Debian package", +] + + +def _priority_index(label): + if label in SEARCH_PRIORITY: + return SEARCH_PRIORITY.index(label) + else: + raise ValueError(f"Can't determine search priority for {label}") + + +def _find_first_valid_lazy(options): + sorted_options = sorted(options, key=lambda x: _priority_index(x[0])) + for label, fn in sorted_options: + value = fn() + if value: + return label, value + return "", None + + +def _build_options(pairs): + """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY.""" + priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)} + return sorted(pairs, key=lambda pair: priority_index.get(pair[0], float("inf"))) + def _find_valid_path(options): """Find valid path from *options*, which is a list of 2-tuple of @@ -62,19 +130,17 @@ def _find_valid_path(options): def _get_libdevice_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), - ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice()), - ("NVIDIA NVCC Wheel", get_libdevice_wheel()), - ] - libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") - if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): - options.append(("System", libdevice_ctk_dir)) - - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk), + ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")), + ("NVIDIA NVCC Wheel", get_libdevice_wheel), + ("System", lambda: get_system_ctk("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice), + ] + ) + return _find_first_valid_lazy(options) def _nvvm_lib_dir(): @@ -86,53 +152,113 @@ def _nvvm_lib_dir(): def _get_nvvm_path_decision(): options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), - ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), - ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel), + ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())), + ("System", lambda: get_system_ctk(*_nvvm_lib_dir())), ] - # need to ensure nvvm dir actually exists - nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) - if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): - options.append(("System", nvvm_ctk_dir)) + return _find_first_valid_lazy(options) - by, path = _find_valid_path(options) - return by, path + +def _get_nvrtc_system_ctk(): + sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64") + candidates = find_lib("nvrtc", sys_path) + if candidates: + return max(candidates) + + +def _get_nvrtc_path_decision(): + options = _build_options( + [ + ("CUDA_HOME", lambda: get_cuda_home("nvrtc")), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_nvrtc_wheel), + ("System", _get_nvrtc_system_ctk), + ] + ) + return _find_first_valid_lazy(options) def _get_nvvm_wheel(): - site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + platform_map = { + "linux": ("lib64", "libnvvm.so"), + "win32": ("bin", "nvvm64_40_0.dll"), + } + + for plat, (dso_dir, dso_path) in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + site_paths = [site.getusersitepackages()] + site.getsitepackages() + + for sp in filter(None, site_paths): + nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path) + if nvvm_path.exists(): + return str(nvvm_path.parent) + + return None + + +def get_nvrtc_dso_path(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() for sp in site_paths: - # The SONAME is taken based on public CTK 12.x releases - if sys.platform.startswith("linux"): - dso_dir = "lib64" - # Hack: libnvvm from Linux wheel - # does not have any soname (CUDAINST-3183) - dso_path = "libnvvm.so" - elif sys.platform.startswith("win32"): - dso_dir = "bin" - dso_path = "nvvm64_40_0.dll" + lib_dir = os.path.join( + sp, + "nvidia", + "cuda_nvrtc", + ("bin" if IS_WIN32 else "lib") if sp else None, + ) + if lib_dir and os.path.exists(lib_dir): + for major in (12, 11): + if major == 11: + cu_ver = "112" if IS_WIN32 else "11.2" + elif major == 12: + cu_ver = "120" if IS_WIN32 else "12" + else: + raise NotImplementedError(f"CUDA {major} is not supported") + + dso_path = os.path.join( + lib_dir, + f"nvrtc64_{cu_ver}_0.dll" if IS_WIN32 else f"libnvrtc.so.{cu_ver}", + ) + if os.path.isfile(dso_path): + return dso_path + return None + + +def _get_nvrtc_wheel(): + dso_path = get_nvrtc_dso_path() + if dso_path: + try: + result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL) + except OSError: + pass else: - raise AssertionError() - - if sp is not None: - dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) - dso_path = os.path.join(dso_dir, dso_path) - if os.path.exists(dso_path): - return str(Path(dso_path).parent) + if IS_WIN32: + import win32api + + # This absolute path will + # always be correct regardless of the package source + nvrtc_path = win32api.GetModuleFileNameW(result._handle) + dso_dir = os.path.dirname(nvrtc_path) + builtins_path = os.path.join( + dso_dir, + [f for f in os.listdir(dso_dir) if re.match("^nvrtc-builtins.*.dll$", f)][0], + ) + if not os.path.exists(builtins_path): + raise RuntimeError(f'Path does not exist: "{builtins_path}"') + return Path(dso_path) def _get_libdevice_paths(): by, libdir = _get_libdevice_path_decision() - if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - out = os.path.join(libdir, "libdevice.10.bc") - else: - # Search for pattern - pat = r"libdevice(\.\d+)*\.bc$" - candidates = find_file(re.compile(pat), libdir) - # Keep only the max (most recent version) of the bitcode files. - out = max(candidates, default=None) + if not libdir: + return _env_path_tuple(by, None) + out = os.path.join(libdir, "libdevice.10.bc") return _env_path_tuple(by, out) @@ -150,26 +276,46 @@ def _cuda_home_static_cudalib_path(): return ("lib64",) +def _get_cudalib_wheel(): + """Get the cudalib path from the NVCC wheel.""" + site_paths = [site.getusersitepackages()] + site.getsitepackages() + libdir = "bin" if IS_WIN32 else "lib" + for sp in filter(None, site_paths): + cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir) + if cudalib_path.exists(): + return str(cudalib_path) + return None + + def _get_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_cudalib_wheel), + ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_static_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ( + "Conda environment (NVIDIA package)", + get_nvidia_static_cudalib_ctk, + ), + ( + "CUDA_HOME", + lambda: get_cuda_home(*_cuda_home_static_cudalib_path()), + ), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_cudalib_dir(): @@ -185,12 +331,12 @@ def _get_static_cudalib_dir(): def get_system_ctk(*subdirs): """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" # Linux? - if sys.platform.startswith("linux"): + if not IS_WIN32: # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = "/usr/local/cuda" - if os.path.exists(base): - return os.path.join(base, *subdirs) + result = os.path.join("/usr/local/cuda", *subdirs) + if os.path.exists(result): + return result def get_conda_ctk(): @@ -283,15 +429,38 @@ def get_cuda_home(*subdirs): def _get_nvvm_path(): by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - path = os.path.join(path, "libnvvm.so") + platform_map = { + "linux": "libnvvm.so", + "win32": "nvvm64_40_0.dll", + } + + for plat, dso_name in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + path = os.path.join(path, dso_name) else: candidates = find_lib("nvvm", path) path = max(candidates) if candidates else None return _env_path_tuple(by, path) +def _get_nvrtc_path(): + by, path = _get_nvrtc_path_decision() + if by == "NVIDIA NVCC Wheel": + path = str(path) + elif by == "System": + return _env_path_tuple(by, path) + else: + candidates = find_lib("nvrtc", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + def get_cuda_paths(): """Returns a dictionary mapping component names to a 2-tuple of (source_variable, info). @@ -310,6 +479,7 @@ def get_cuda_paths(): # Not in cache d = { "nvvm": _get_nvvm_path(), + "nvrtc": _get_nvrtc_path(), "libdevice": _get_libdevice_paths(), "cudalib_dir": _get_cudalib_dir(), "static_cudalib_dir": _get_static_cudalib_dir(), diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index e60154aa5a..af9f42fbf8 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -1,19 +1,19 @@ # Copyright 2024-2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools import glob import os +import sys -from .cuda_paths import IS_WIN32, get_cuda_paths -from .supported_libs import is_suppressed_dll_file -from .sys_path_find_sub_dirs import sys_path_find_sub_dirs +from cuda.bindings._path_finder.cuda_paths import get_cuda_paths +from cuda.bindings._path_finder.find_sub_dirs import find_sub_dirs_all_sitepackages +from cuda.bindings._path_finder.supported_libs import is_suppressed_dll_file def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): error_messages.append(f"No such file: {file_wild}") - for sub_dir in sys_path_find_sub_dirs(sub_dirs): + for sub_dir in find_sub_dirs_all_sitepackages(sub_dirs): attachments.append(f' listdir("{sub_dir}"):') for node in sorted(os.listdir(sub_dir)): attachments.append(f" {node}") @@ -25,7 +25,7 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm else: nvidia_sub_dirs = ("nvidia", "*", "lib") file_wild = so_basename + "*" - for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): # First look for an exact match so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): @@ -48,17 +48,16 @@ def _find_dll_under_dir(dirpath, file_wild): return None -def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): +def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, attachments): if libname == "nvvm": # noqa: SIM108 nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") else: nvidia_sub_dirs = ("nvidia", "*", "bin") - file_wild = libname + "*.dll" - for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): - dll_name = _find_dll_under_dir(bin_dir, file_wild) + for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): + dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) if dll_name is not None: return dll_name - _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + _no_such_file_in_sub_dirs(nvidia_sub_dirs, lib_searched_for, error_messages, attachments) return None @@ -122,14 +121,16 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None - if IS_WIN32: - self.abs_path = _find_dll_using_nvidia_bin_dirs(libname, self.error_messages, self.attachments) + if sys.platform == "win32": + self.lib_searched_for = f"{libname}*.dll" + self.abs_path = _find_dll_using_nvidia_bin_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) if self.abs_path is None: if libname == "nvvm": self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) else: self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) - self.lib_searched_for = f"{libname}*.dll" else: self.lib_searched_for = f"lib{libname}.so" self.abs_path = _find_so_using_nvidia_lib_dirs( diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py similarity index 67% rename from cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py rename to cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py index d2da726c94..8101326252 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py @@ -1,16 +1,15 @@ # Copyright 2024-2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools import os +import site import sys -@functools.cache -def _impl(sys_path, sub_dirs): +def find_sub_dirs_no_cache(parent_dirs, sub_dirs): results = [] - for base in sys_path: + for base in parent_dirs: stack = [(base, 0)] # (current_path, index into sub_dirs) while stack: current_path, idx = stack.pop() @@ -36,5 +35,18 @@ def _impl(sys_path, sub_dirs): return results -def sys_path_find_sub_dirs(sub_dirs): - return _impl(tuple(sys.path), tuple(sub_dirs)) +@functools.cache +def find_sub_dirs_cached(parent_dirs, sub_dirs): + return find_sub_dirs_no_cache(parent_dirs, sub_dirs) + + +def find_sub_dirs(parent_dirs, sub_dirs): + return find_sub_dirs_cached(tuple(parent_dirs), tuple(sub_dirs)) + + +def find_sub_dirs_sys_path(sub_dirs): + return find_sub_dirs(sys.path, sub_dirs) + + +def find_sub_dirs_all_sitepackages(sub_dirs): + return find_sub_dirs((site.getusersitepackages(),) + tuple(site.getsitepackages()), sub_dirs) diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py index 4de57c9053..992a3940e5 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/findlib.py +++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py @@ -1,5 +1,33 @@ +# SPDX-License-Identifier: BSD-2-Clause +# # Forked from: # https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py +# +# Original LICENSE: +# Copyright (c) 2012, Anaconda, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import re diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py new file mode 100644 index 0000000000..4592f6c335 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -0,0 +1,40 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from dataclasses import dataclass +from typing import Callable, Optional + +from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES + + +@dataclass +class LoadedDL: + """Represents a loaded dynamic library. + + Attributes: + handle: The library handle (can be converted to void* in Cython) + abs_path: The absolute path to the library file + was_already_loaded_from_elsewhere: Whether the library was already loaded + """ + + # ATTENTION: To convert `handle` back to `void*` in cython: + # Linux: `cdef void* ptr = ` + # Windows: `cdef void* ptr = ` + handle: int + abs_path: Optional[str] + was_already_loaded_from_elsewhere: bool + + +def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> None: + """Load all dependencies for a given library. + + Args: + libname: The name of the library whose dependencies should be loaded + load_func: The function to use for loading libraries (e.g. load_nvidia_dynamic_library) + + Example: + >>> load_dependencies("cudart", load_nvidia_dynamic_library) + # This will load all dependencies of cudart using the provided loading function + """ + for dep in DIRECT_DEPENDENCIES.get(libname, ()): + load_func(dep) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py new file mode 100644 index 0000000000..b9f3839e1a --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py @@ -0,0 +1,125 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes +import ctypes.util +import os +from typing import Optional + +from cuda.bindings._path_finder.load_dl_common import LoadedDL + +CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" +LIBDL = ctypes.CDLL(LIBDL_PATH) +LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] +LIBDL.dladdr.restype = ctypes.c_int + + +class Dl_info(ctypes.Structure): + """Structure used by dladdr to return information about a loaded symbol.""" + + _fields_ = [ + ("dli_fname", ctypes.c_char_p), # path to .so + ("dli_fbase", ctypes.c_void_p), + ("dli_sname", ctypes.c_char_p), + ("dli_saddr", ctypes.c_void_p), + ] + + +def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> Optional[str]: + """Get the absolute path of a loaded dynamic library on Linux. + + Args: + libname: The name of the library + handle: The library handle + + Returns: + The absolute path to the library file, or None if no expected symbol is found + + Raises: + OSError: If dladdr fails to get information about the symbol + """ + from cuda.bindings._path_finder.supported_libs import EXPECTED_LIB_SYMBOLS + + for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: + symbol = getattr(handle, symbol_name, None) + if symbol is not None: + break + else: + return None + + addr = ctypes.cast(symbol, ctypes.c_void_p) + info = Dl_info() + if LIBDL.dladdr(addr, ctypes.byref(info)) == 0: + raise OSError(f"dladdr failed for {libname=!r}") + return info.dli_fname.decode() + + +def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: + """Check if the library is already loaded in the process. + + Args: + libname: The name of the library to check + + Returns: + A LoadedDL object if the library is already loaded, None otherwise + + Example: + >>> loaded = check_if_already_loaded_from_elsewhere("cudart") + >>> if loaded is not None: + ... print(f"Library already loaded from {loaded.abs_path}") + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_LINUX_SONAMES + + for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): + try: + handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) + except OSError: + continue + else: + return LoadedDL(handle._handle, abs_path_for_dynamic_library(libname, handle), True) + return None + + +def load_with_system_search(libname: str, soname: str) -> Optional[LoadedDL]: + """Try to load a library using system search paths. + + Args: + libname: The name of the library to load + soname: The soname to search for + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + + Raises: + RuntimeError: If the library is loaded but no expected symbol is found + """ + try: + handle = ctypes.CDLL(soname, CDLL_MODE) + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(handle._handle, abs_path, False) + except OSError: + return None + + +def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: + """Load a dynamic library from the given path. + + Args: + libname: The name of the library to load + found_path: The absolute path to the library file + + Returns: + A LoadedDL object representing the loaded library + + Raises: + RuntimeError: If the library cannot be loaded + """ + try: + handle = ctypes.CDLL(found_path, CDLL_MODE) + except OSError as e: + raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e + return LoadedDL(handle._handle, found_path, False) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py new file mode 100644 index 0000000000..1f0c9c7e22 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -0,0 +1,149 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes +import ctypes.wintypes +from typing import Optional + +import pywintypes +import win32api + +from cuda.bindings._path_finder.load_dl_common import LoadedDL + +# Mirrors WinBase.h (unfortunately not defined already elsewhere) +WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + + +def add_dll_directory(dll_abs_path: str) -> None: + """Add a DLL directory to the search path and update PATH environment variable. + + Args: + dll_abs_path: Absolute path to the DLL file + + Raises: + AssertionError: If the directory containing the DLL does not exist + """ + import os + + dirpath = os.path.dirname(dll_abs_path) + assert os.path.isdir(dirpath), dll_abs_path + # Add the DLL directory to the search path + os.add_dll_directory(dirpath) + # Update PATH as a fallback for dependent DLL resolution + curr_path = os.environ.get("PATH") + os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) + + +def abs_path_for_dynamic_library(handle: int) -> str: + """Get the absolute path of a loaded dynamic library on Windows. + + Args: + handle: The library handle + + Returns: + The absolute path to the DLL file + + Raises: + OSError: If GetModuleFileNameW fails + RuntimeError: If the required path length is unreasonably long + """ + MAX_ITERATIONS = 10 # Allows for extremely long paths (up to ~266,000 chars) + buf_size = 260 # Start with traditional MAX_PATH + + for _ in range(MAX_ITERATIONS): + buf = ctypes.create_unicode_buffer(buf_size) + n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, buf_size) + + if n_chars == 0: + raise OSError( + "GetModuleFileNameW failed. Long paths may require enabling the " + "Windows 10+ long path registry setting. See: " + "https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" + ) + if n_chars < buf_size - 1: + return buf.value + + buf_size *= 2 # Double the buffer size and try again + + raise RuntimeError( + f"Failed to retrieve the full path after {MAX_ITERATIONS} attempts " + f"(final buffer size: {buf_size} characters). " + "This may indicate:\n" + " 1. An extremely long path requiring Windows long path support, or\n" + " 2. An invalid or corrupt library handle, or\n" + " 3. An unexpected system error.\n" + "See: https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" + ) + + +def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: + """Check if the library is already loaded in the process. + + Args: + libname: The name of the library to check + + Returns: + A LoadedDL object if the library is already loaded, None otherwise + + Example: + >>> loaded = check_if_already_loaded_from_elsewhere("cudart") + >>> if loaded is not None: + ... print(f"Library already loaded from {loaded.abs_path}") + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS + + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + continue + else: + return LoadedDL(handle, abs_path_for_dynamic_library(handle), True) + return None + + +def load_with_system_search(libname: str, _unused: str) -> Optional[LoadedDL]: + """Try to load a DLL using system search paths. + + Args: + libname: The name of the library to load + _unused: Unused parameter (kept for interface consistency) + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS + + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) + if handle: + return LoadedDL(handle, abs_path_for_dynamic_library(handle), False) + + return None + + +def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: + """Load a dynamic library from the given path. + + Args: + libname: The name of the library to load + found_path: The absolute path to the DLL file + + Returns: + A LoadedDL object representing the loaded library + + Raises: + RuntimeError: If the DLL cannot be loaded + """ + from cuda.bindings._path_finder.supported_libs import LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY + + if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: + add_dll_directory(found_path) + + flags = WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + try: + handle = win32api.LoadLibraryEx(found_path, 0, flags) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load DLL at {found_path}: {e}") from e + return LoadedDL(handle, found_path, False) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index c770de67d0..015c4cdf85 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -1,193 +1,60 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import ctypes import functools -import os import sys -from dataclasses import dataclass -from typing import Optional, Tuple -if sys.platform == "win32": - import ctypes.wintypes - - import pywintypes - import win32api - - # Mirrors WinBase.h (unfortunately not defined already elsewhere) - _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_dl_common import LoadedDL, load_dependencies +if sys.platform == "win32": + from cuda.bindings._path_finder.load_dl_windows import ( + check_if_already_loaded_from_elsewhere, + load_with_abs_path, + load_with_system_search, + ) else: - import ctypes.util - - _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL - - _LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" - _LIBDL = ctypes.CDLL(_LIBDL_PATH) - _LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] - _LIBDL.dladdr.restype = ctypes.c_int - - class Dl_info(ctypes.Structure): - _fields_ = [ - ("dli_fname", ctypes.c_char_p), # path to .so - ("dli_fbase", ctypes.c_void_p), - ("dli_sname", ctypes.c_char_p), - ("dli_saddr", ctypes.c_void_p), - ] - - -from .find_nvidia_dynamic_library import _find_nvidia_dynamic_library -from .supported_libs import ( - DIRECT_DEPENDENCIES, - EXPECTED_LIB_SYMBOLS, - LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY, - SUPPORTED_LINUX_SONAMES, - SUPPORTED_WINDOWS_DLLS, -) - - -def _add_dll_directory(dll_abs_path): - dirpath = os.path.dirname(dll_abs_path) - assert os.path.isdir(dirpath), dll_abs_path - # Add the DLL directory to the search path - os.add_dll_directory(dirpath) - # Update PATH as a fallback for dependent DLL resolution - curr_path = os.environ.get("PATH") - os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) - - -@functools.cache -def _windows_cuDriverGetVersion() -> int: - handle = win32api.LoadLibrary("nvcuda.dll") - - kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) - GetProcAddress = kernel32.GetProcAddress - GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] - GetProcAddress.restype = ctypes.c_void_p - cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") - assert cuDriverGetVersion - - FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) - cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) - driver_ver = ctypes.c_int() - err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) - assert err == 0 - return driver_ver.value - - -def _abs_path_for_dynamic_library_windows(handle: int) -> str: - buf = ctypes.create_unicode_buffer(260) - n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, len(buf)) - if n_chars == 0: - raise OSError("GetModuleFileNameW failed") - return buf.value - - -@functools.cache -def _windows_load_with_dll_basename(name: str) -> Tuple[Optional[int], Optional[str]]: - driver_ver = _windows_cuDriverGetVersion() - del driver_ver # Keeping this here because it will probably be needed in the future. - - dll_names = SUPPORTED_WINDOWS_DLLS.get(name) - if dll_names is None: - return None - - for dll_name in dll_names: - handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) - if handle: - return handle, _abs_path_for_dynamic_library_windows(handle) - - return None, None - - -def _abs_path_for_dynamic_library_linux(libname: str, handle: int) -> str: - for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: - symbol = getattr(handle, symbol_name, None) - if symbol is not None: - break - else: - return None - addr = ctypes.cast(symbol, ctypes.c_void_p) - info = Dl_info() - if _LIBDL.dladdr(addr, ctypes.byref(info)) == 0: - raise OSError(f"dladdr failed for {libname=!r}") - return info.dli_fname.decode() - - -def _load_and_report_path_linux(libname: str, soname: str) -> Tuple[int, str]: - handle = ctypes.CDLL(soname, _LINUX_CDLL_MODE) - abs_path = _abs_path_for_dynamic_library_linux(libname, handle) - if abs_path is None: - raise RuntimeError(f"No expected symbol for {libname=!r}") - return handle, abs_path - - -@dataclass -class LoadedDL: - # ATTENTION: To convert `handle` back to `void*` in cython: - # Linux: `cdef void* ptr = ` - # Windows: `cdef void* ptr = ` - handle: int - abs_path: Optional[str] - was_already_loaded_from_elsewhere: bool + from cuda.bindings._path_finder.load_dl_linux import ( + check_if_already_loaded_from_elsewhere, + load_with_abs_path, + load_with_system_search, + ) def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: - # Detect if the library was loaded already in some other way (i.e. not via this function). - if sys.platform == "win32": - for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): - try: - handle = win32api.GetModuleHandle(dll_name) - except pywintypes.error: - pass - else: - return LoadedDL(handle, _abs_path_for_dynamic_library_windows(handle), True) - else: - for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): - try: - handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) - except OSError: - pass - else: - return LoadedDL(handle, _abs_path_for_dynamic_library_linux(libname, handle), True) + # Check whether the library is already loaded into the current process by + # some other component. This check uses OS-level mechanisms (e.g., + # dlopen on Linux, GetModuleHandle on Windows). + loaded = check_if_already_loaded_from_elsewhere(libname) + if loaded is not None: + return loaded - for dep in DIRECT_DEPENDENCIES.get(libname, ()): - load_nvidia_dynamic_library(dep) + # Load dependencies first + load_dependencies(libname, load_nvidia_dynamic_library) + # Find the library path found = _find_nvidia_dynamic_library(libname) if found.abs_path is None: - if sys.platform == "win32": - handle, abs_path = _windows_load_with_dll_basename(libname) - if handle: - return LoadedDL(handle, abs_path, False) - else: - try: - handle, abs_path = _load_and_report_path_linux(libname, found.lib_searched_for) - except OSError: - pass - else: - return LoadedDL(handle._handle, abs_path, False) + loaded = load_with_system_search(libname, found.lib_searched_for) + if loaded is not None: + return loaded found.raise_if_abs_path_is_None() - if sys.platform == "win32": - if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: - _add_dll_directory(found.abs_path) - flags = _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR - try: - handle = win32api.LoadLibraryEx(found.abs_path, 0, flags) - except pywintypes.error as e: - raise RuntimeError(f"Failed to load DLL at {found.abs_path}: {e}") from e - return LoadedDL(handle, found.abs_path, False) - else: - try: - handle = ctypes.CDLL(found.abs_path, _LINUX_CDLL_MODE) - except OSError as e: - raise RuntimeError(f"Failed to dlopen {found.abs_path}: {e}") from e - return LoadedDL(handle._handle, found.abs_path, False) + # Load the library from the found path + return load_with_abs_path(libname, found.abs_path) @functools.cache def load_nvidia_dynamic_library(libname: str) -> LoadedDL: + """Load a NVIDIA dynamic library by name. + + Args: + libname: The name of the library to load (e.g. "cudart", "nvvm", etc.) + + Returns: + A LoadedDL object containing the library handle and path + + Raises: + RuntimeError: If the library cannot be found or loaded + """ return _load_nvidia_dynamic_library_no_cache(libname) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index ee62b92b8a..6852c7fcea 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -1,9 +1,10 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # THIS FILE NEEDS TO BE REVIEWED/UPDATED FOR EACH CTK RELEASE +import sys + SUPPORTED_LIBNAMES = ( # Core CUDA Runtime and Compiler "nvJitLink", @@ -11,7 +12,7 @@ "nvvm", ) -PARTIALLY_SUPPORTED_LIBNAMES = ( +PARTIALLY_SUPPORTED_LIBNAMES_COMMON = ( # Core CUDA Runtime and Compiler "cudart", "nvfatbin", @@ -37,11 +38,38 @@ "npps", "nvblas", # Other + "nvjpeg", +) + +# Note: The `cufile_rdma` information is intentionally retained (commented out) +# despite not being actively used in the current build. It took a nontrivial +# amount of effort to determine the SONAME, dependencies, and expected symbols +# for this special-case library, especially given its RDMA/MLX5 dependencies +# and limited availability. Keeping this as a reference avoids having to +# reconstruct the information from scratch in the future. + +PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY = ( "cufile", # "cufile_rdma", # Requires libmlx5.so - "nvjpeg", ) +PARTIALLY_SUPPORTED_LIBNAMES_LINUX = PARTIALLY_SUPPORTED_LIBNAMES_COMMON + PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY + +PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY = () + +PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS = PARTIALLY_SUPPORTED_LIBNAMES_COMMON + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY + +PARTIALLY_SUPPORTED_LIBNAMES_ALL = ( + PARTIALLY_SUPPORTED_LIBNAMES_COMMON + + PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY + + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY +) + +if sys.platform == "win32": + PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS +else: + PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_LINUX + # Based on ldd output for Linux x86_64 nvidia-*-cu12 wheels (12.8.1) DIRECT_DEPENDENCIES = { "cublas": ("cublasLt",), @@ -231,8 +259,6 @@ "cufftw64_10.dll", "cufftw64_11.dll", ), - "cufile": (), - # "cufile_rdma": (), "curand": ("curand64_10.dll",), "cusolver": ( "cusolver64_10.dll", @@ -333,7 +359,10 @@ def is_suppressed_dll_file(path_basename: str) -> bool: # Based on nm output for Linux x86_64 /usr/local/cuda (12.8.1) EXPECTED_LIB_SYMBOLS = { - "nvJitLink": ("nvJitLinkVersion",), + "nvJitLink": ( + "__nvJitLinkCreate_12_0", # 12.0 through 12.8 (at least) + "nvJitLinkVersion", # 12.3 and up + ), "nvrtc": ("nvrtcVersion",), "nvvm": ("nvvmVersion",), "cudart": ("cudaRuntimeGetVersion",), diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 9c08bdc258..28badd0255 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -2,10 +2,12 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library -from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES +from cuda.bindings._path_finder.load_nvidia_dynamic_library import ( + load_nvidia_dynamic_library as _load_nvidia_dynamic_library, +) +from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES as _SUPPORTED_LIBNAMES __all__ = [ - "load_nvidia_dynamic_library", - "SUPPORTED_LIBNAMES", + "_load_nvidia_dynamic_library", + "_SUPPORTED_LIBNAMES", ] diff --git a/cuda_bindings/tests/test_path_finder.py b/cuda_bindings/tests/test_path_finder_find_load.py similarity index 58% rename from cuda_bindings/tests/test_path_finder.py rename to cuda_bindings/tests/test_path_finder_find_load.py index cb659026fc..2a5f887fdc 100644 --- a/cuda_bindings/tests/test_path_finder.py +++ b/cuda_bindings/tests/test_path_finder_find_load.py @@ -1,3 +1,6 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + import os import subprocess # nosec B404 import sys @@ -7,35 +10,34 @@ from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs -ALL_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +ALL_LIBNAMES = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_ALL +ALL_LIBNAMES_LINUX = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_LINUX +ALL_LIBNAMES_WINDOWS = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS if os.environ.get("CUDA_BINDINGS_PATH_FINDER_TEST_ALL_LIBNAMES", False): - TEST_LIBNAMES = ALL_LIBNAMES + if sys.platform == "win32": + TEST_FIND_OR_LOAD_LIBNAMES = ALL_LIBNAMES_WINDOWS + else: + TEST_FIND_OR_LOAD_LIBNAMES = ALL_LIBNAMES_LINUX else: - TEST_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + TEST_FIND_OR_LOAD_LIBNAMES = path_finder._SUPPORTED_LIBNAMES def test_all_libnames_linux_sonames_consistency(): - assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) + assert tuple(sorted(ALL_LIBNAMES_LINUX)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) def test_all_libnames_windows_dlls_consistency(): - assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) + assert tuple(sorted(ALL_LIBNAMES_WINDOWS)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) def test_all_libnames_libnames_requiring_os_add_dll_directory_consistency(): - assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES)) + assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES_WINDOWS)) def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) -def _check_nvjitlink_usable(): - from cuda.bindings._internal import nvjitlink as inner_nvjitlink - - return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 - - def _build_subprocess_failed_for_libname_message(libname, result): return ( f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" @@ -45,14 +47,15 @@ def _build_subprocess_failed_for_libname_message(libname, result): @pytest.mark.parametrize("api", ("find", "load")) -@pytest.mark.parametrize("libname", TEST_LIBNAMES) +@pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): - if sys.platform == "win32" and not supported_libs.SUPPORTED_WINDOWS_DLLS[libname]: - pytest.skip(f"{libname=!r} not supported on {sys.platform=}") - - if libname == "nvJitLink" and not _check_nvjitlink_usable(): - pytest.skip(f"{libname=!r} not usable") - + # We intentionally run each dynamic library operation in a subprocess + # to ensure isolation of global dynamic linking state (e.g., dlopen handles). + # Without subprocesses, loading/unloading libraries during testing could + # interfere across test cases and lead to nondeterministic or platform-specific failures. + # + # Defining the subprocess code snippets as strings ensures each subprocess + # runs a minimal, independent script tailored to the specific libname and API being tested. if api == "find": code = f"""\ from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library @@ -61,14 +64,14 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): """ else: code = f"""\ -from cuda.bindings.path_finder import load_nvidia_dynamic_library +from cuda.bindings.path_finder import _load_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache -loaded_dl_fresh = load_nvidia_dynamic_library({libname!r}) +loaded_dl_fresh = _load_nvidia_dynamic_library({libname!r}) if loaded_dl_fresh.was_already_loaded_from_elsewhere: raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") -loaded_dl_from_cache = load_nvidia_dynamic_library({libname!r}) +loaded_dl_from_cache = _load_nvidia_dynamic_library({libname!r}) if loaded_dl_from_cache is not loaded_dl_fresh: raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") @@ -85,6 +88,7 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", + timeout=30, # Ensure CI testing does not hang for an excessive amount of time. ) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") diff --git a/cuda_bindings/tests/test_path_finder_find_sub_dirs.py b/cuda_bindings/tests/test_path_finder_find_sub_dirs.py new file mode 100644 index 0000000000..6b2644bff2 --- /dev/null +++ b/cuda_bindings/tests/test_path_finder_find_sub_dirs.py @@ -0,0 +1,91 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import os + +import pytest + +from cuda.bindings._path_finder.find_sub_dirs import ( + find_sub_dirs, + find_sub_dirs_all_sitepackages, + find_sub_dirs_sys_path, +) + +NONEXISTENT = "NonExistentE12DBF1Fbe948337576B5F1E88f60bb2" + + +@pytest.fixture +def test_tree(tmp_path): + # Build: + # tmp_path/ + # sys1/nvidia/foo/lib + # sys1/nvidia/bar/lib + # sys2/nvidia/baz/nvvm/lib64 + base = tmp_path + (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) + (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) + (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) + + return { + "parent_paths": ( + str(base / "sys1"), + str(base / "sys2"), + str(base / NONEXISTENT), + ), + "base": base, + } + + +def test_exact_match(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "foo", "lib")) + expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] + assert result == expected + + +def test_single_wildcard(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "*", "lib")) + expected = [ + str(base / "sys1" / "nvidia" / "bar" / "lib"), + str(base / "sys1" / "nvidia" / "foo" / "lib"), + ] + assert sorted(result) == sorted(expected) + + +def test_double_wildcard(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "*", "nvvm", "lib64")) + expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] + assert result == expected + + +def test_no_match(test_tree): + parent_paths = test_tree["parent_paths"] + result = find_sub_dirs(parent_paths, (NONEXISTENT,)) + assert result == [] + + +def test_empty_parent_paths(): + result = find_sub_dirs((), ("nvidia", "*", "lib")) + assert result == [] + + +def test_empty_sub_dirs(test_tree): + parent_paths = test_tree["parent_paths"] + result = find_sub_dirs(parent_paths, ()) + expected = [p for p in parent_paths if os.path.isdir(p)] + assert sorted(result) == sorted(expected) + + +def test_find_sub_dirs_sys_path_no_math(): + result = find_sub_dirs_sys_path((NONEXISTENT,)) + assert result == [] + + +def test_find_sub_dirs_all_sitepackages_no_match(): + result = find_sub_dirs_all_sitepackages((NONEXISTENT,)) + assert result == [] diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py deleted file mode 100644 index 3297ce39e0..0000000000 --- a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -import pytest - -from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl - - -@pytest.fixture -def test_tree(tmp_path): - # Build: - # tmp_path/ - # sys1/nvidia/foo/lib - # sys1/nvidia/bar/lib - # sys2/nvidia/baz/nvvm/lib64 - base = tmp_path - (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) - (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) - (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) - - return { - "sys_path": ( - str(base / "sys1"), - str(base / "sys2"), - str(base / "nonexistent"), # should be ignored - ), - "base": base, - } - - -def test_exact_match(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "foo", "lib")) - expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] - assert result == expected - - -def test_single_wildcard(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "*", "lib")) - expected = [ - str(base / "sys1" / "nvidia" / "bar" / "lib"), - str(base / "sys1" / "nvidia" / "foo" / "lib"), - ] - assert sorted(result) == sorted(expected) - - -def test_double_wildcard(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64")) - expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] - assert result == expected - - -def test_no_match(test_tree): - sys_path = test_tree["sys_path"] - result = _impl(sys_path, ("nvidia", "nonexistent", "lib")) - assert result == [] - - -def test_empty_sys_path(): - result = _impl((), ("nvidia", "*", "lib")) - assert result == [] - - -def test_empty_sub_dirs(test_tree): - sys_path = test_tree["sys_path"] - result = _impl(sys_path, ()) - expected = [p for p in sys_path if os.path.isdir(p)] - assert sorted(result) == sorted(expected) diff --git a/toolshed/build_path_finder_dlls.py b/toolshed/build_path_finder_dlls.py index c82dcd866d..be2db0d1ff 100755 --- a/toolshed/build_path_finder_dlls.py +++ b/toolshed/build_path_finder_dlls.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + # Input for this script: .txt files generated with: # for exe in *.exe; do 7z l $exe > "${exe%.exe}.txt"; done diff --git a/toolshed/build_path_finder_sonames.py b/toolshed/build_path_finder_sonames.py index 20e8ec6c7d..17b7dd7b3c 100755 --- a/toolshed/build_path_finder_sonames.py +++ b/toolshed/build_path_finder_sonames.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + # Input for this script: # output of toolshed/find_sonames.sh diff --git a/toolshed/find_sonames.sh b/toolshed/find_sonames.sh index 79c2e89d5c..b742becf6d 100755 --- a/toolshed/find_sonames.sh +++ b/toolshed/find_sonames.sh @@ -1,4 +1,9 @@ #!/bin/bash + +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + find "$@" -type f -name '*.so*' -print0 | while IFS= read -r -d '' f; do type=$(test -L "$f" && echo SYMLINK || echo FILE) soname=$(readelf -d "$f" 2>/dev/null | awk '/SONAME/ {gsub(/[][]/, "", $5); print $5; exit}') diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py index 5f47b39903..19f43c2881 100644 --- a/toolshed/run_cuda_bindings_path_finder.py +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -1,3 +1,7 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + import sys import traceback @@ -5,7 +9,7 @@ from cuda.bindings._path_finder import cuda_paths, supported_libs ALL_LIBNAMES = ( - path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES + path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES ) @@ -20,7 +24,7 @@ def run(args): for libname in ALL_LIBNAMES: print(f"{libname=}") try: - loaded_dl = path_finder.load_nvidia_dynamic_library(libname) + loaded_dl = path_finder._load_nvidia_dynamic_library(libname) except Exception: print(f"EXCEPTION for {libname=}:") traceback.print_exc(file=sys.stdout)