diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index f1546e2999..557847f280 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -27,7 +27,7 @@ dynamic = [ "readme", ] dependencies = [ - "cuda-pathfinder ~= 1.0", + "cuda-pathfinder ~=1.1", "pywin32; sys_platform == 'win32'", ] diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index 388c75845d..bb6c32b63a 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -12,7 +12,7 @@ IS_WINDOWS, is_suppressed_dll_file, ) -from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages +from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs, find_sub_dirs_all_sitepackages def _no_such_file_in_sub_dirs( @@ -28,18 +28,21 @@ def _no_such_file_in_sub_dirs( def _find_so_using_nvidia_lib_dirs( libname: str, so_basename: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") if libname == "nvvm" else ("nvidia", "*", "lib") file_wild = so_basename + "*" - for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): - # First look for an exact match - so_name = os.path.join(lib_dir, so_basename) - if os.path.isfile(so_name): - return so_name - # Look for a versioned library - # Using sort here mainly to make the result deterministic. - for so_name in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + nvidia_sub_dirs_list: list[tuple[str, ...]] = [("nvidia", "*", "lib")] # works also for CTK 13 nvvm + if libname == "nvvm": + nvidia_sub_dirs_list.append(("nvidia", "*", "nvvm", "lib64")) # CTK 12 + for nvidia_sub_dirs in nvidia_sub_dirs_list: + for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): + # First look for an exact match + so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): return so_name + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for so_name in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + if os.path.isfile(so_name): + return so_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None @@ -56,11 +59,17 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]: def _find_dll_using_nvidia_bin_dirs( libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") if libname == "nvvm" else ("nvidia", "*", "bin") - for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): - dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) - if dll_name is not None: - return dll_name + nvidia_sub_dirs_list: list[tuple[str, ...]] = [ + ("nvidia", "*", "bin"), # CTK 12 + ("nvidia", "*", "bin", "*"), # CTK 13, e.g. site-packages\nvidia\cu13\bin\x86_64\ + ] + if libname == "nvvm": + nvidia_sub_dirs_list.append(("nvidia", "*", "nvvm", "bin")) # Only for CTK 12 + for nvidia_sub_dirs in nvidia_sub_dirs_list: + for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): + dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) + if dll_name is not None: + return dll_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, lib_searched_for, error_messages, attachments) return None @@ -76,21 +85,29 @@ def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]: cuda_home = _get_cuda_home() if cuda_home is None: return None - subdirs: tuple[str, ...] + subdirs_list: tuple[tuple[str, ...], ...] if IS_WINDOWS: - subdirs = (os.path.join("nvvm", "bin"),) if libname == "nvvm" else ("bin",) + if libname == "nvvm": # noqa: SIM108 + subdirs_list = ( + ("nvvm", "bin", "*"), # CTK 13 + ("nvvm", "bin"), # CTK 12 + ) + else: + subdirs_list = ( + ("bin", "x64"), # CTK 13 + ("bin",), # CTK 12 + ) else: - subdirs = ( - (os.path.join("nvvm", "lib64"),) - if libname == "nvvm" - else ( - "lib64", # CTK - "lib", # Conda + if libname == "nvvm": # noqa: SIM108 + subdirs_list = (("nvvm", "lib64"),) + else: + subdirs_list = ( + ("lib64",), # CTK + ("lib",), # Conda ) - ) - for subdir in subdirs: - dirname = os.path.join(cuda_home, subdir) - if os.path.isdir(dirname): + for sub_dirs in subdirs_list: + dirname: str # work around bug in mypy + for dirname in find_sub_dirs((cuda_home,), sub_dirs): return dirname return None diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py index 251e0593a2..29192ec4c3 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import contextlib import ctypes import ctypes.util import os @@ -109,7 +110,26 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]: return None -def load_with_abs_path(_libname: str, found_path: str) -> LoadedDL: +def _work_around_known_bugs(libname: str, found_path: str) -> None: + if libname == "nvrtc": + # Work around bug/oversight in + # nvidia_cuda_nvrtc-13.0.48-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl + # Issue: libnvrtc.so.13 RUNPATH is not set. + # This workaround is highly specific + # - for simplicity. + # - to not mask bugs in future nvidia-cuda-nvrtc releases. + # - because a more general workaround is complicated. + dirname, basename = os.path.split(found_path) + if basename == "libnvrtc.so.13": + dep_basename = "libnvrtc-builtins.so.13.0" + dep_path = os.path.join(dirname, dep_basename) + if os.path.isfile(dep_path): + # In case of failure, defer to primary load, which is almost certain to fail, too. + with contextlib.suppress(OSError): + ctypes.CDLL(dep_path, CDLL_MODE) + + +def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: """Load a dynamic library from the given path. Args: @@ -122,6 +142,7 @@ def load_with_abs_path(_libname: str, found_path: str) -> LoadedDL: Raises: RuntimeError: If the library cannot be loaded """ + _work_around_known_bugs(libname, found_path) try: handle = ctypes.CDLL(found_path, CDLL_MODE) except OSError as e: diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py index 19d73b23e0..14901c3e1f 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py @@ -101,29 +101,35 @@ # cuda_12.4.1_550.54.15_linux.run # cuda_12.5.1_555.42.06_linux.run # cuda_12.6.2_560.35.03_linux.run -# cuda_12.8.0_570.86.10_linux.run -# cuda_12.9.0_575.51.03_linux.run +# cuda_12.8.1_570.124.06_linux.run +# cuda_12.9.1_575.57.08_linux.run +# cuda_13.0.0_580.65.06_linux.run # Generated with toolshed/build_pathfinder_sonames.py SUPPORTED_LINUX_SONAMES = { "cublas": ( "libcublas.so.11", "libcublas.so.12", + "libcublas.so.13", ), "cublasLt": ( "libcublasLt.so.11", "libcublasLt.so.12", + "libcublasLt.so.13", ), "cudart": ( "libcudart.so.11.0", "libcudart.so.12", + "libcudart.so.13", ), "cufft": ( "libcufft.so.10", "libcufft.so.11", + "libcufft.so.12", ), "cufftw": ( "libcufftw.so.10", "libcufftw.so.11", + "libcufftw.so.12", ), "cufile": ("libcufile.so.0",), # "cufile_rdma": ("libcufile_rdma.so.1",), @@ -131,10 +137,12 @@ "cusolver": ( "libcusolver.so.10", "libcusolver.so.11", + "libcusolver.so.12", ), "cusolverMg": ( "libcusolverMg.so.10", "libcusolverMg.so.11", + "libcusolverMg.so.12", ), "cusparse": ( "libcusparse.so.11", @@ -143,62 +151,82 @@ "nppc": ( "libnppc.so.11", "libnppc.so.12", + "libnppc.so.13", ), "nppial": ( "libnppial.so.11", "libnppial.so.12", + "libnppial.so.13", ), "nppicc": ( "libnppicc.so.11", "libnppicc.so.12", + "libnppicc.so.13", ), "nppidei": ( "libnppidei.so.11", "libnppidei.so.12", + "libnppidei.so.13", ), "nppif": ( "libnppif.so.11", "libnppif.so.12", + "libnppif.so.13", ), "nppig": ( "libnppig.so.11", "libnppig.so.12", + "libnppig.so.13", ), "nppim": ( "libnppim.so.11", "libnppim.so.12", + "libnppim.so.13", ), "nppist": ( "libnppist.so.11", "libnppist.so.12", + "libnppist.so.13", ), "nppisu": ( "libnppisu.so.11", "libnppisu.so.12", + "libnppisu.so.13", ), "nppitc": ( "libnppitc.so.11", "libnppitc.so.12", + "libnppitc.so.13", ), "npps": ( "libnpps.so.11", "libnpps.so.12", + "libnpps.so.13", + ), + "nvJitLink": ( + "libnvJitLink.so.12", + "libnvJitLink.so.13", ), - "nvJitLink": ("libnvJitLink.so.12",), "nvblas": ( "libnvblas.so.11", "libnvblas.so.12", + "libnvblas.so.13", + ), + "nvfatbin": ( + "libnvfatbin.so.12", + "libnvfatbin.so.13", ), - "nvfatbin": ("libnvfatbin.so.12",), "nvjpeg": ( "libnvjpeg.so.11", "libnvjpeg.so.12", + "libnvjpeg.so.13", ), "nvrtc": ( "libnvrtc.so.11.0", "libnvrtc.so.11.1", "libnvrtc.so.11.2", "libnvrtc.so.12", + "libnvrtc.so.13", ), "nvvm": ( "libnvvm.so.3", @@ -224,39 +252,47 @@ # cuda_12.5.1_555.85_windows.exe # cuda_12.6.2_560.94_windows.exe # cuda_12.8.1_572.61_windows.exe -# cuda_12.9.0_576.02_windows.txt -# Generated with toolshed/build_pathfinder_dlls.py (WITH MANUAL EDITS) +# cuda_12.9.1_576.57_windows.exe +# cuda_13.0.0_windows.exe +# Generated with toolshed/build_pathfinder_dlls.py SUPPORTED_WINDOWS_DLLS = { "cublas": ( "cublas64_11.dll", "cublas64_12.dll", + "cublas64_13.dll", ), "cublasLt": ( "cublasLt64_11.dll", "cublasLt64_12.dll", + "cublasLt64_13.dll", ), "cudart": ( "cudart64_101.dll", "cudart64_110.dll", "cudart64_12.dll", + "cudart64_13.dll", "cudart64_65.dll", ), "cufft": ( "cufft64_10.dll", "cufft64_11.dll", + "cufft64_12.dll", ), "cufftw": ( "cufftw64_10.dll", "cufftw64_11.dll", + "cufftw64_12.dll", ), "curand": ("curand64_10.dll",), "cusolver": ( "cusolver64_10.dll", "cusolver64_11.dll", + "cusolver64_12.dll", ), "cusolverMg": ( "cusolverMg64_10.dll", "cusolverMg64_11.dll", + "cusolverMg64_12.dll", ), "cusparse": ( "cusparse64_11.dll", @@ -265,62 +301,82 @@ "nppc": ( "nppc64_11.dll", "nppc64_12.dll", + "nppc64_13.dll", ), "nppial": ( "nppial64_11.dll", "nppial64_12.dll", + "nppial64_13.dll", ), "nppicc": ( "nppicc64_11.dll", "nppicc64_12.dll", + "nppicc64_13.dll", ), "nppidei": ( "nppidei64_11.dll", "nppidei64_12.dll", + "nppidei64_13.dll", ), "nppif": ( "nppif64_11.dll", "nppif64_12.dll", + "nppif64_13.dll", ), "nppig": ( "nppig64_11.dll", "nppig64_12.dll", + "nppig64_13.dll", ), "nppim": ( "nppim64_11.dll", "nppim64_12.dll", + "nppim64_13.dll", ), "nppist": ( "nppist64_11.dll", "nppist64_12.dll", + "nppist64_13.dll", ), "nppisu": ( "nppisu64_11.dll", "nppisu64_12.dll", + "nppisu64_13.dll", ), "nppitc": ( "nppitc64_11.dll", "nppitc64_12.dll", + "nppitc64_13.dll", ), "npps": ( "npps64_11.dll", "npps64_12.dll", + "npps64_13.dll", + ), + "nvJitLink": ( + "nvJitLink_120_0.dll", + "nvJitLink_130_0.dll", ), - "nvJitLink": ("nvJitLink_120_0.dll",), "nvblas": ( "nvblas64_11.dll", "nvblas64_12.dll", + "nvblas64_13.dll", + ), + "nvfatbin": ( + "nvfatbin_120_0.dll", + "nvfatbin_130_0.dll", ), - "nvfatbin": ("nvfatbin_120_0.dll",), "nvjpeg": ( "nvjpeg64_11.dll", "nvjpeg64_12.dll", + "nvjpeg64_13.dll", ), "nvrtc": ( "nvrtc64_110_0.dll", "nvrtc64_111_0.dll", "nvrtc64_112_0.dll", "nvrtc64_120_0.dll", + "nvrtc64_130_0.dll", ), "nvvm": ( "nvvm64.dll", @@ -347,10 +403,10 @@ def is_suppressed_dll_file(path_basename: str) -> bool: return path_basename.startswith(("cudart32_", "nvvm32")) -# Based on nm output for Linux x86_64 /usr/local/cuda (12.8.1) +# Based on `nm -D --defined-only` output for Linux x86_64 distributions. EXPECTED_LIB_SYMBOLS = { "nvJitLink": ( - "__nvJitLinkCreate_12_0", # 12.0 through 12.8 (at least) + "__nvJitLinkCreate_12_0", # 12.0 through 12.9 "nvJitLinkVersion", # 12.3 and up ), "nvrtc": ("nvrtcVersion",), @@ -366,16 +422,16 @@ def is_suppressed_dll_file(path_basename: str) -> bool: "cusolverMg": ("cusolverMgCreate",), "cusparse": ("cusparseGetVersion",), "nppc": ("nppGetLibVersion",), - "nppial": ("nppiAdd_32f_C1R",), - "nppicc": ("nppiColorToGray_8u_C3C1R",), - "nppidei": ("nppiCopy_8u_C1R",), - "nppif": ("nppiFilterSobelHorizBorder_8u_C1R",), - "nppig": ("nppiResize_8u_C1R",), - "nppim": ("nppiErode_8u_C1R",), - "nppist": ("nppiMean_8u_C1R",), + "nppial": ("nppiAdd_32f_C1R_Ctx",), + "nppicc": ("nppiColorToGray_8u_C3C1R_Ctx",), + "nppidei": ("nppiCopy_8u_C1R_Ctx",), + "nppif": ("nppiFilterSobelHorizBorder_8u_C1R_Ctx",), + "nppig": ("nppiResize_8u_C1R_Ctx",), + "nppim": ("nppiErode_8u_C1R_Ctx",), + "nppist": ("nppiMean_8u_C1R_Ctx",), "nppisu": ("nppiFree",), - "nppitc": ("nppiThreshold_8u_C1R",), - "npps": ("nppsAdd_32f",), + "nppitc": ("nppiThreshold_8u_C1R_Ctx",), + "npps": ("nppsAdd_32f_Ctx",), "nvblas": ("dgemm",), "cufile": ("cuFileGetVersion",), # "cufile_rdma": ("rdma_buffer_reg",), diff --git a/cuda_pathfinder/cuda/pathfinder/_version.py b/cuda_pathfinder/cuda/pathfinder/_version.py index 23e58fddbb..a41dd93edd 100644 --- a/cuda_pathfinder/cuda/pathfinder/_version.py +++ b/cuda_pathfinder/cuda/pathfinder/_version.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.0.0" +__version__ = "1.1.0" diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 161dd66971..ac6724277e 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -29,6 +29,22 @@ nvidia_wheels_cu12 = [ "nvidia-nvjitlink-cu12", "nvidia-nvjpeg-cu12", ] +nvidia_wheels_cu13 = [ + "nvidia-cublas", + "nvidia-cuda-nvcc", + "nvidia-cuda-nvrtc", + "nvidia-cuda-runtime", + "nvidia-cufft", + "nvidia-cufile; sys_platform != 'win32'", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", + "nvidia-npp", + "nvidia-nvfatbin", + "nvidia-nvjitlink", + "nvidia-nvjpeg", + "nvidia-nvvm", +] [project.urls] Repository = "https://github.com/NVIDIA/cuda-python"