diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index 2b0f3dc236..d2bb0b63b0 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -56,7 +56,7 @@ cdef int cuPythonInit() except -1 nogil: {{if 'Windows' == platform.system()}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") + handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -242,7 +242,7 @@ cdef int cuPythonInit() except -1 nogil: {{else}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") + handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 9d21a3e105..78b4d802b3 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -53,7 +53,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index f86972216d..b306a30017 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -40,7 +40,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 33ba8e6105..82335508be 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -51,7 +51,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 6349fa5a1e..21b4d94180 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -38,7 +38,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle return handle diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 3d6604f082..e60154aa5a 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -7,6 +7,7 @@ import os from .cuda_paths import IS_WIN32, get_cuda_paths +from .supported_libs import is_suppressed_dll_file from .sys_path_find_sub_dirs import sys_path_find_sub_dirs @@ -38,9 +39,13 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm return None -def _append_to_os_environ_path(dirpath): - curr_path = os.environ.get("PATH") - os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) +def _find_dll_under_dir(dirpath, file_wild): + for path in sorted(glob.glob(os.path.join(dirpath, file_wild))): + if not os.path.isfile(path): + continue + if not is_suppressed_dll_file(os.path.basename(path)): + return path + return None def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): @@ -50,30 +55,8 @@ def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): nvidia_sub_dirs = ("nvidia", "*", "bin") file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): - dll_name = None - have_builtins = False - for path in sorted(glob.glob(os.path.join(bin_dir, file_wild))): - # nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl: - # nvidia\cuda_nvrtc\bin\ - # nvrtc-builtins64_128.dll - # nvrtc64_120_0.alt.dll - # nvrtc64_120_0.dll - node = os.path.basename(path) - if node.endswith(".alt.dll"): - continue - if "-builtins" in node: - have_builtins = True - continue - if dll_name is not None: - continue - if os.path.isfile(path): - dll_name = path + dll_name = _find_dll_under_dir(bin_dir, file_wild) if dll_name is not None: - if have_builtins: - # Add the DLL directory to the search path - os.add_dll_directory(bin_dir) - # Update PATH as a fallback for dependent DLL resolution - _append_to_os_environ_path(bin_dir) return dll_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None @@ -122,9 +105,9 @@ def _find_dll_using_cudalib_dir(libname, error_messages, attachments): if cudalib_dir is None: return None file_wild = libname + "*.dll" - for dll_name in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): - if os.path.isfile(dll_name): - return dll_name + dll_name = _find_dll_under_dir(cudalib_dir, file_wild) + if dll_name is not None: + return dll_name error_messages.append(f"No such file: {file_wild}") attachments.append(f' listdir("{cudalib_dir}"):') for node in sorted(os.listdir(cudalib_dir)): @@ -132,31 +115,42 @@ def _find_dll_using_cudalib_dir(libname, error_messages, attachments): return None -@functools.cache -def find_nvidia_dynamic_library(name: str) -> str: - error_messages = [] - attachments = [] - - if IS_WIN32: - dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) - if dll_name is None: - if name == "nvvm": - dll_name = _get_cuda_paths_info("nvvm", error_messages) - else: - dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) - if dll_name is None: - attachments = "\n".join(attachments) - raise RuntimeError(f'Failure finding "{name}*.dll": {", ".join(error_messages)}\n{attachments}') - return dll_name - - so_basename = f"lib{name}.so" - so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) - if so_name is None: - if name == "nvvm": - so_name = _get_cuda_paths_info("nvvm", error_messages) +class _find_nvidia_dynamic_library: + def __init__(self, libname: str): + self.libname = libname + self.error_messages = [] + self.attachments = [] + self.abs_path = None + + if IS_WIN32: + self.abs_path = _find_dll_using_nvidia_bin_dirs(libname, self.error_messages, self.attachments) + if self.abs_path is None: + if libname == "nvvm": + self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) + else: + self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) + self.lib_searched_for = f"{libname}*.dll" else: - so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) - if so_name is None: - attachments = "\n".join(attachments) - raise RuntimeError(f'Failure finding "{so_basename}": {", ".join(error_messages)}\n{attachments}') - return so_name + self.lib_searched_for = f"lib{libname}.so" + self.abs_path = _find_so_using_nvidia_lib_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) + if self.abs_path is None: + if libname == "nvvm": + self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) + else: + self.abs_path = _find_so_using_cudalib_dir( + self.lib_searched_for, self.error_messages, self.attachments + ) + + def raise_if_abs_path_is_None(self): + if self.abs_path: + return self.abs_path + err = ", ".join(self.error_messages) + att = "\n".join(self.attachments) + raise RuntimeError(f'Failure finding "{self.lib_searched_for}": {err}\n{att}') + + +@functools.cache +def find_nvidia_dynamic_library(libname: str) -> str: + return _find_nvidia_dynamic_library(libname).raise_if_abs_path_is_None() diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 1a52bf0dde..c770de67d0 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -1,5 +1,13 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes import functools +import os import sys +from dataclasses import dataclass +from typing import Optional, Tuple if sys.platform == "win32": import ctypes.wintypes @@ -12,12 +20,42 @@ _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 else: - import ctypes - import os + import ctypes.util _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL -from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + _LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" + _LIBDL = ctypes.CDLL(_LIBDL_PATH) + _LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] + _LIBDL.dladdr.restype = ctypes.c_int + + class Dl_info(ctypes.Structure): + _fields_ = [ + ("dli_fname", ctypes.c_char_p), # path to .so + ("dli_fbase", ctypes.c_void_p), + ("dli_sname", ctypes.c_char_p), + ("dli_saddr", ctypes.c_void_p), + ] + + +from .find_nvidia_dynamic_library import _find_nvidia_dynamic_library +from .supported_libs import ( + DIRECT_DEPENDENCIES, + EXPECTED_LIB_SYMBOLS, + LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY, + SUPPORTED_LINUX_SONAMES, + SUPPORTED_WINDOWS_DLLS, +) + + +def _add_dll_directory(dll_abs_path): + dirpath = os.path.dirname(dll_abs_path) + assert os.path.isdir(dirpath), dll_abs_path + # Add the DLL directory to the search path + os.add_dll_directory(dirpath) + # Update PATH as a fallback for dependent DLL resolution + curr_path = os.environ.get("PATH") + os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) @functools.cache @@ -39,56 +77,117 @@ def _windows_cuDriverGetVersion() -> int: return driver_ver.value +def _abs_path_for_dynamic_library_windows(handle: int) -> str: + buf = ctypes.create_unicode_buffer(260) + n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, len(buf)) + if n_chars == 0: + raise OSError("GetModuleFileNameW failed") + return buf.value + + @functools.cache -def _windows_load_with_dll_basename(name: str) -> int: +def _windows_load_with_dll_basename(name: str) -> Tuple[Optional[int], Optional[str]]: driver_ver = _windows_cuDriverGetVersion() del driver_ver # Keeping this here because it will probably be needed in the future. - if name == "nvJitLink": - dll_name = "nvJitLink_120_0.dll" - elif name == "nvrtc": - dll_name = "nvrtc64_120_0.dll" - elif name == "nvvm": - dll_name = "nvvm64_40_0.dll" + dll_names = SUPPORTED_WINDOWS_DLLS.get(name) + if dll_names is None: + return None - try: - return win32api.LoadLibrary(dll_name) - except pywintypes.error: - pass + for dll_name in dll_names: + handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) + if handle: + return handle, _abs_path_for_dynamic_library_windows(handle) - return None + return None, None -@functools.cache -def load_nvidia_dynamic_library(name: str) -> int: - # First try using the platform-specific dynamic loader search mechanisms +def _abs_path_for_dynamic_library_linux(libname: str, handle: int) -> str: + for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: + symbol = getattr(handle, symbol_name, None) + if symbol is not None: + break + else: + return None + addr = ctypes.cast(symbol, ctypes.c_void_p) + info = Dl_info() + if _LIBDL.dladdr(addr, ctypes.byref(info)) == 0: + raise OSError(f"dladdr failed for {libname=!r}") + return info.dli_fname.decode() + + +def _load_and_report_path_linux(libname: str, soname: str) -> Tuple[int, str]: + handle = ctypes.CDLL(soname, _LINUX_CDLL_MODE) + abs_path = _abs_path_for_dynamic_library_linux(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return handle, abs_path + + +@dataclass +class LoadedDL: + # ATTENTION: To convert `handle` back to `void*` in cython: + # Linux: `cdef void* ptr = ` + # Windows: `cdef void* ptr = ` + handle: int + abs_path: Optional[str] + was_already_loaded_from_elsewhere: bool + + +def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: + # Detect if the library was loaded already in some other way (i.e. not via this function). if sys.platform == "win32": - handle = _windows_load_with_dll_basename(name) - if handle: - return handle + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + pass + else: + return LoadedDL(handle, _abs_path_for_dynamic_library_windows(handle), True) else: - dl_path = f"lib{name}.so" # Version intentionally no specified. - try: - handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) - except OSError: - pass + for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): + try: + handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) + except OSError: + pass + else: + return LoadedDL(handle, _abs_path_for_dynamic_library_linux(libname, handle), True) + + for dep in DIRECT_DEPENDENCIES.get(libname, ()): + load_nvidia_dynamic_library(dep) + + found = _find_nvidia_dynamic_library(libname) + if found.abs_path is None: + if sys.platform == "win32": + handle, abs_path = _windows_load_with_dll_basename(libname) + if handle: + return LoadedDL(handle, abs_path, False) else: - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle._handle # C unsigned int + try: + handle, abs_path = _load_and_report_path_linux(libname, found.lib_searched_for) + except OSError: + pass + else: + return LoadedDL(handle._handle, abs_path, False) + found.raise_if_abs_path_is_None() - dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": + if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: + _add_dll_directory(found.abs_path) flags = _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR try: - handle = win32api.LoadLibraryEx(dl_path, 0, flags) + handle = win32api.LoadLibraryEx(found.abs_path, 0, flags) except pywintypes.error as e: - raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle # C signed int, matches win32api.GetProcAddress + raise RuntimeError(f"Failed to load DLL at {found.abs_path}: {e}") from e + return LoadedDL(handle, found.abs_path, False) else: try: - handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + handle = ctypes.CDLL(found.abs_path, _LINUX_CDLL_MODE) except OSError as e: - raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle._handle # C unsigned int + raise RuntimeError(f"Failed to dlopen {found.abs_path}: {e}") from e + return LoadedDL(handle._handle, found.abs_path, False) + + +@functools.cache +def load_nvidia_dynamic_library(libname: str) -> LoadedDL: + return _load_nvidia_dynamic_library_no_cache(libname) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py new file mode 100644 index 0000000000..ee62b92b8a --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -0,0 +1,364 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# THIS FILE NEEDS TO BE REVIEWED/UPDATED FOR EACH CTK RELEASE + +SUPPORTED_LIBNAMES = ( + # Core CUDA Runtime and Compiler + "nvJitLink", + "nvrtc", + "nvvm", +) + +PARTIALLY_SUPPORTED_LIBNAMES = ( + # Core CUDA Runtime and Compiler + "cudart", + "nvfatbin", + # Math Libraries + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + # Other + "cufile", + # "cufile_rdma", # Requires libmlx5.so + "nvjpeg", +) + +# Based on ldd output for Linux x86_64 nvidia-*-cu12 wheels (12.8.1) +DIRECT_DEPENDENCIES = { + "cublas": ("cublasLt",), + "cufftw": ("cufft",), + # "cufile_rdma": ("cufile",), + "cusolver": ("nvJitLink", "cusparse", "cublasLt", "cublas"), + "cusolverMg": ("nvJitLink", "cublasLt", "cublas"), + "cusparse": ("nvJitLink",), + "nppial": ("nppc",), + "nppicc": ("nppc",), + "nppidei": ("nppc",), + "nppif": ("nppc",), + "nppig": ("nppc",), + "nppim": ("nppc",), + "nppist": ("nppc",), + "nppisu": ("nppc",), + "nppitc": ("nppc",), + "npps": ("nppc",), + "nvblas": ("cublas", "cublasLt"), +} + +# Based on these released files: +# cuda_11.0.3_450.51.06_linux.run +# cuda_11.1.1_455.32.00_linux.run +# cuda_11.2.2_460.32.03_linux.run +# cuda_11.3.1_465.19.01_linux.run +# cuda_11.4.4_470.82.01_linux.run +# cuda_11.5.1_495.29.05_linux.run +# cuda_11.6.2_510.47.03_linux.run +# cuda_11.7.1_515.65.01_linux.run +# cuda_11.8.0_520.61.05_linux.run +# cuda_12.0.1_525.85.12_linux.run +# cuda_12.1.1_530.30.02_linux.run +# cuda_12.2.2_535.104.05_linux.run +# cuda_12.3.2_545.23.08_linux.run +# cuda_12.4.1_550.54.15_linux.run +# cuda_12.5.1_555.42.06_linux.run +# cuda_12.6.2_560.35.03_linux.run +# cuda_12.8.0_570.86.10_linux.run +# Generated with toolshed/build_path_finder_sonames.py +SUPPORTED_LINUX_SONAMES = { + "cublas": ( + "libcublas.so.11", + "libcublas.so.12", + ), + "cublasLt": ( + "libcublasLt.so.11", + "libcublasLt.so.12", + ), + "cudart": ( + "libcudart.so.11.0", + "libcudart.so.12", + ), + "cufft": ( + "libcufft.so.10", + "libcufft.so.11", + ), + "cufftw": ( + "libcufftw.so.10", + "libcufftw.so.11", + ), + "cufile": ("libcufile.so.0",), + # "cufile_rdma": ("libcufile_rdma.so.1",), + "curand": ("libcurand.so.10",), + "cusolver": ( + "libcusolver.so.10", + "libcusolver.so.11", + ), + "cusolverMg": ( + "libcusolverMg.so.10", + "libcusolverMg.so.11", + ), + "cusparse": ( + "libcusparse.so.11", + "libcusparse.so.12", + ), + "nppc": ( + "libnppc.so.11", + "libnppc.so.12", + ), + "nppial": ( + "libnppial.so.11", + "libnppial.so.12", + ), + "nppicc": ( + "libnppicc.so.11", + "libnppicc.so.12", + ), + "nppidei": ( + "libnppidei.so.11", + "libnppidei.so.12", + ), + "nppif": ( + "libnppif.so.11", + "libnppif.so.12", + ), + "nppig": ( + "libnppig.so.11", + "libnppig.so.12", + ), + "nppim": ( + "libnppim.so.11", + "libnppim.so.12", + ), + "nppist": ( + "libnppist.so.11", + "libnppist.so.12", + ), + "nppisu": ( + "libnppisu.so.11", + "libnppisu.so.12", + ), + "nppitc": ( + "libnppitc.so.11", + "libnppitc.so.12", + ), + "npps": ( + "libnpps.so.11", + "libnpps.so.12", + ), + "nvJitLink": ("libnvJitLink.so.12",), + "nvblas": ( + "libnvblas.so.11", + "libnvblas.so.12", + ), + "nvfatbin": ("libnvfatbin.so.12",), + "nvjpeg": ( + "libnvjpeg.so.11", + "libnvjpeg.so.12", + ), + "nvrtc": ( + "libnvrtc.so.11.0", + "libnvrtc.so.11.1", + "libnvrtc.so.11.2", + "libnvrtc.so.12", + ), + "nvvm": ( + "libnvvm.so.3", + "libnvvm.so.4", + ), +} + +# Based on these released files: +# cuda_11.0.3_451.82_win10.exe +# cuda_11.1.1_456.81_win10.exe +# cuda_11.2.2_461.33_win10.exe +# cuda_11.3.1_465.89_win10.exe +# cuda_11.4.4_472.50_windows.exe +# cuda_11.5.1_496.13_windows.exe +# cuda_11.6.2_511.65_windows.exe +# cuda_11.7.1_516.94_windows.exe +# cuda_11.8.0_522.06_windows.exe +# cuda_12.0.1_528.33_windows.exe +# cuda_12.1.1_531.14_windows.exe +# cuda_12.2.2_537.13_windows.exe +# cuda_12.3.2_546.12_windows.exe +# cuda_12.4.1_551.78_windows.exe +# cuda_12.5.1_555.85_windows.exe +# cuda_12.6.2_560.94_windows.exe +# cuda_12.8.1_572.61_windows.exe +# Generated with toolshed/build_path_finder_dlls.py (WITH MANUAL EDITS) +SUPPORTED_WINDOWS_DLLS = { + "cublas": ( + "cublas64_11.dll", + "cublas64_12.dll", + ), + "cublasLt": ( + "cublasLt64_11.dll", + "cublasLt64_12.dll", + ), + "cudart": ( + "cudart32_110.dll", + "cudart32_65.dll", + "cudart32_90.dll", + "cudart64_101.dll", + "cudart64_110.dll", + "cudart64_12.dll", + "cudart64_65.dll", + ), + "cufft": ( + "cufft64_10.dll", + "cufft64_11.dll", + "cufftw64_10.dll", + "cufftw64_11.dll", + ), + "cufftw": ( + "cufftw64_10.dll", + "cufftw64_11.dll", + ), + "cufile": (), + # "cufile_rdma": (), + "curand": ("curand64_10.dll",), + "cusolver": ( + "cusolver64_10.dll", + "cusolver64_11.dll", + ), + "cusolverMg": ( + "cusolverMg64_10.dll", + "cusolverMg64_11.dll", + ), + "cusparse": ( + "cusparse64_11.dll", + "cusparse64_12.dll", + ), + "nppc": ( + "nppc64_11.dll", + "nppc64_12.dll", + ), + "nppial": ( + "nppial64_11.dll", + "nppial64_12.dll", + ), + "nppicc": ( + "nppicc64_11.dll", + "nppicc64_12.dll", + ), + "nppidei": ( + "nppidei64_11.dll", + "nppidei64_12.dll", + ), + "nppif": ( + "nppif64_11.dll", + "nppif64_12.dll", + ), + "nppig": ( + "nppig64_11.dll", + "nppig64_12.dll", + ), + "nppim": ( + "nppim64_11.dll", + "nppim64_12.dll", + ), + "nppist": ( + "nppist64_11.dll", + "nppist64_12.dll", + ), + "nppisu": ( + "nppisu64_11.dll", + "nppisu64_12.dll", + ), + "nppitc": ( + "nppitc64_11.dll", + "nppitc64_12.dll", + ), + "npps": ( + "npps64_11.dll", + "npps64_12.dll", + ), + "nvJitLink": ("nvJitLink_120_0.dll",), + "nvblas": ( + "nvblas64_11.dll", + "nvblas64_12.dll", + ), + "nvfatbin": ("nvfatbin_120_0.dll",), + "nvjpeg": ( + "nvjpeg64_11.dll", + "nvjpeg64_12.dll", + ), + "nvrtc": ( + "nvrtc64_110_0.dll", + "nvrtc64_111_0.dll", + "nvrtc64_112_0.dll", + "nvrtc64_120_0.dll", + ), + "nvvm": ( + "nvvm32.dll", + "nvvm64.dll", + "nvvm64_33_0.dll", + "nvvm64_40_0.dll", + ), +} + +LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY = ( + "cufft", + "nvrtc", +) + + +def is_suppressed_dll_file(path_basename: str) -> bool: + if path_basename.startswith("nvrtc"): + # nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl: + # nvidia\cuda_nvrtc\bin\ + # nvrtc-builtins64_128.dll + # nvrtc64_120_0.alt.dll + # nvrtc64_120_0.dll + return path_basename.endswith(".alt.dll") or "-builtins" in path_basename + return False + + +# Based on nm output for Linux x86_64 /usr/local/cuda (12.8.1) +EXPECTED_LIB_SYMBOLS = { + "nvJitLink": ("nvJitLinkVersion",), + "nvrtc": ("nvrtcVersion",), + "nvvm": ("nvvmVersion",), + "cudart": ("cudaRuntimeGetVersion",), + "nvfatbin": ("nvFatbinVersion",), + "cublas": ("cublasGetVersion",), + "cublasLt": ("cublasLtGetVersion",), + "cufft": ("cufftGetVersion",), + "cufftw": ("fftwf_malloc",), + "curand": ("curandGetVersion",), + "cusolver": ("cusolverGetVersion",), + "cusolverMg": ("cusolverMgCreate",), + "cusparse": ("cusparseGetVersion",), + "nppc": ("nppGetLibVersion",), + "nppial": ("nppiAdd_32f_C1R",), + "nppicc": ("nppiColorToGray_8u_C3C1R",), + "nppidei": ("nppiCopy_8u_C1R",), + "nppif": ("nppiFilterSobelHorizBorder_8u_C1R",), + "nppig": ("nppiResize_8u_C1R",), + "nppim": ("nppiErode_8u_C1R",), + "nppist": ("nppiMean_8u_C1R",), + "nppisu": ("nppiFree",), + "nppitc": ("nppiThreshold_8u_C1R",), + "npps": ("nppsAdd_32f",), + "nvblas": ("dgemm",), + "cufile": ("cuFileGetVersion",), + # "cufile_rdma": ("rdma_buffer_reg",), + "nvjpeg": ("nvjpegCreate",), +} diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 21aeb4b368..9c08bdc258 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -2,36 +2,10 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings._path_finder.cuda_paths import ( - get_conda_ctk, - get_conda_include_dir, - get_cuda_home, - get_cuda_paths, - get_current_cuda_target_name, - get_debian_pkg_libdevice, - get_libdevice_wheel, - get_nvidia_cudalib_ctk, - get_nvidia_libdevice_ctk, - get_nvidia_nvvm_ctk, - get_nvidia_static_cudalib_ctk, - get_system_ctk, -) -from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library +from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES __all__ = [ - "find_nvidia_dynamic_library", "load_nvidia_dynamic_library", - "get_conda_ctk", - "get_conda_include_dir", - "get_cuda_home", - "get_cuda_paths", - "get_current_cuda_target_name", - "get_debian_pkg_libdevice", - "get_libdevice_wheel", - "get_nvidia_cudalib_ctk", - "get_nvidia_libdevice_ctk", - "get_nvidia_nvvm_ctk", - "get_nvidia_static_cudalib_ctk", - "get_system_ctk", + "SUPPORTED_LIBNAMES", ] diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index e6a9492f5b..8921cc5a21 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -25,7 +25,6 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", "Environment :: GPU :: NVIDIA CUDA", ] dynamic = [ diff --git a/cuda_bindings/tests/conftest.py b/cuda_bindings/tests/conftest.py new file mode 100644 index 0000000000..bcdc37db43 --- /dev/null +++ b/cuda_bindings/tests/conftest.py @@ -0,0 +1,20 @@ +import pytest + + +def pytest_configure(config): + config.custom_info = [] + + +def pytest_terminal_summary(terminalreporter, exitstatus, config): + if config.custom_info: + terminalreporter.write_sep("=", "INFO summary") + for msg in config.custom_info: + terminalreporter.line(f"INFO {msg}") + + +@pytest.fixture +def info_summary_append(request): + def _append(message): + request.config.custom_info.append(f"{request.node.name}: {message}") + + return _append diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py deleted file mode 100644 index 9b7dd23a3a..0000000000 --- a/cuda_bindings/tests/path_finder.py +++ /dev/null @@ -1,18 +0,0 @@ -from cuda.bindings import path_finder - -paths = path_finder.get_cuda_paths() - -for k, v in paths.items(): - print(f"{k}: {v}", flush=True) -print() - -libnames = ("nvJitLink", "nvrtc", "nvvm") - -for libname in libnames: - print(path_finder.find_nvidia_dynamic_library(libname)) - print() - -for libname in libnames: - print(libname) - print(path_finder.load_nvidia_dynamic_library(libname)) - print() diff --git a/cuda_bindings/tests/test_path_finder.py b/cuda_bindings/tests/test_path_finder.py new file mode 100644 index 0000000000..cb659026fc --- /dev/null +++ b/cuda_bindings/tests/test_path_finder.py @@ -0,0 +1,92 @@ +import os +import subprocess # nosec B404 +import sys + +import pytest + +from cuda.bindings import path_finder +from cuda.bindings._path_finder import supported_libs + +ALL_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +if os.environ.get("CUDA_BINDINGS_PATH_FINDER_TEST_ALL_LIBNAMES", False): + TEST_LIBNAMES = ALL_LIBNAMES +else: + TEST_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + + +def test_all_libnames_linux_sonames_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) + + +def test_all_libnames_windows_dlls_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) + + +def test_all_libnames_libnames_requiring_os_add_dll_directory_consistency(): + assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES)) + + +def test_all_libnames_expected_lib_symbols_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) + + +def _check_nvjitlink_usable(): + from cuda.bindings._internal import nvjitlink as inner_nvjitlink + + return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 + + +def _build_subprocess_failed_for_libname_message(libname, result): + return ( + f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-subprocess ---\n{result.stdout}\n" + f"--- stderr-from-subprocess ---\n{result.stderr}\n" + ) + + +@pytest.mark.parametrize("api", ("find", "load")) +@pytest.mark.parametrize("libname", TEST_LIBNAMES) +def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): + if sys.platform == "win32" and not supported_libs.SUPPORTED_WINDOWS_DLLS[libname]: + pytest.skip(f"{libname=!r} not supported on {sys.platform=}") + + if libname == "nvJitLink" and not _check_nvjitlink_usable(): + pytest.skip(f"{libname=!r} not usable") + + if api == "find": + code = f"""\ +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +abs_path = find_nvidia_dynamic_library({libname!r}) +print(f"{{abs_path!r}}") +""" + else: + code = f"""\ +from cuda.bindings.path_finder import load_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache + +loaded_dl_fresh = load_nvidia_dynamic_library({libname!r}) +if loaded_dl_fresh.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") + +loaded_dl_from_cache = load_nvidia_dynamic_library({libname!r}) +if loaded_dl_from_cache is not loaded_dl_fresh: + raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") + +loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r}) +if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") +if loaded_dl_no_cache.abs_path != loaded_dl_fresh.abs_path: + raise RuntimeError(f"{{loaded_dl_no_cache.abs_path=!r}} != {{loaded_dl_fresh.abs_path=!r}}") + +print(f"{{loaded_dl_fresh.abs_path!r}}") +""" + result = subprocess.run( # nosec B603 + [sys.executable, "-c", code], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + ) + if result.returncode == 0: + info_summary_append(f"abs_path={result.stdout.rstrip()}") + else: + raise RuntimeError(_build_subprocess_failed_for_libname_message(libname, result)) diff --git a/toolshed/build_path_finder_dlls.py b/toolshed/build_path_finder_dlls.py new file mode 100755 index 0000000000..c82dcd866d --- /dev/null +++ b/toolshed/build_path_finder_dlls.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +# Input for this script: .txt files generated with: +# for exe in *.exe; do 7z l $exe > "${exe%.exe}.txt"; done + +# The output of this script +# requires obvious manual edits to remove duplicates and unwanted dlls. + +import sys + +LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER = ( + "nvJitLink", + "nvrtc", + "nvvm", + "cudart", + "nvfatbin", + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + "cufile", + "cufile_rdma", + "nvjpeg", +) + + +def run(args): + dlls_from_files = set() + for filename in args: + lines_iter = iter(open(filename).read().splitlines()) + for line in lines_iter: + if line.startswith("-------------------"): + break + else: + raise RuntimeError("------------------- NOT FOUND") + for line in lines_iter: + if line.startswith("-------------------"): + break + assert line[52] == " ", line + assert line[53] != " ", line + path = line[53:] + if path.endswith(".dll"): + dll = path.rsplit("/", 1)[1] + dlls_from_files.add(dll) + else: + raise RuntimeError("------------------- NOT FOUND") + + print("DLLs in scope of cuda.bindings.path_finder") + print("==========================================") + dlls_in_scope = set() + for libname in sorted(LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER): + print(f'"{libname}": (') + for dll in sorted(dlls_from_files): + if dll.startswith(libname): + dlls_in_scope.add(dll) + print(f' "{dll}",') + print("),") + print() + + print("DLLs out of scope") + print("=================") + for dll in sorted(dlls_from_files - dlls_in_scope): + print(dll) + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:]) diff --git a/toolshed/build_path_finder_sonames.py b/toolshed/build_path_finder_sonames.py new file mode 100755 index 0000000000..20e8ec6c7d --- /dev/null +++ b/toolshed/build_path_finder_sonames.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Input for this script: +# output of toolshed/find_sonames.sh + +# The output of this script +# is expected to be usable as-is. + +import sys + +LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER = ( + "nvJitLink", + "nvrtc", + "nvvm", + "cudart", + "nvfatbin", + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + "cufile", + "cufile_rdma", + "nvjpeg", +) + + +def run(args): + assert len(args) == 1, "output-of-find_sonames.sh" + + sonames_from_file = set() + for line in open(args[0]).read().splitlines(): + flds = line.split() + assert len(flds) == 3, flds + if flds[-1] != "SONAME_NOT_SET": + sonames_from_file.add(flds[-1]) + + print("SONAMEs in scope of cuda.bindings.path_finder") + print("=============================================") + sonames_in_scope = set() + for libname in sorted(LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER): + print(f'"{libname}": (') + lib_so = "lib" + libname + ".so" + for soname in sorted(sonames_from_file): + if soname.startswith(lib_so): + sonames_in_scope.add(soname) + print(f' "{soname}",') + print("),") + print() + + print("SONAMEs out of scope") + print("====================") + for soname in sorted(sonames_from_file - sonames_in_scope): + print(soname) + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:]) diff --git a/toolshed/find_sonames.sh b/toolshed/find_sonames.sh new file mode 100755 index 0000000000..79c2e89d5c --- /dev/null +++ b/toolshed/find_sonames.sh @@ -0,0 +1,6 @@ +#!/bin/bash +find "$@" -type f -name '*.so*' -print0 | while IFS= read -r -d '' f; do + type=$(test -L "$f" && echo SYMLINK || echo FILE) + soname=$(readelf -d "$f" 2>/dev/null | awk '/SONAME/ {gsub(/[][]/, "", $5); print $5; exit}') + echo "$f $type ${soname:-SONAME_NOT_SET}" +done diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py new file mode 100644 index 0000000000..5f47b39903 --- /dev/null +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -0,0 +1,34 @@ +import sys +import traceback + +from cuda.bindings import path_finder +from cuda.bindings._path_finder import cuda_paths, supported_libs + +ALL_LIBNAMES = ( + path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +) + + +def run(args): + assert len(args) == 0 + + paths = cuda_paths.get_cuda_paths() + for k, v in paths.items(): + print(f"{k}: {v}", flush=True) + print() + + for libname in ALL_LIBNAMES: + print(f"{libname=}") + try: + loaded_dl = path_finder.load_nvidia_dynamic_library(libname) + except Exception: + print(f"EXCEPTION for {libname=}:") + traceback.print_exc(file=sys.stdout) + else: + print(f" {loaded_dl.abs_path=!r}") + print(f" {loaded_dl.was_already_loaded_from_elsewhere=!r}") + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:])