From 67005ba4c1f0c1441a24aef01d1624e44475f2a5 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 7 Feb 2025 19:11:56 -0800 Subject: [PATCH 1/9] Add nvvm bindings (#421) * Add nvvm to setup.py * Add test_nvvm.py * test_nvvm.py version(), ir_version() * Snapshot of generated files. * Add in `nvvm.create_program()` * Add in `nvvm.destroy_program()` * Add in `nvvm.compile_program()` * Add in add_module_to_program() * Add in verify_program() * Add in lazy_add_module_to_program() * Add in get_compiled_result_size(), get_program_log_size() * Add in get_compiled_result(), get_program_log() * Change Copyright dates to 2025 * Use cybind results "automatically generated across versions from 12.0.1 to 12.8.0." * update to use NVKS runners * Add tests/run_simple.py * update fetch_ctk to find nvvm shared lib * fix wheel rel path * add nvcc wheel to [all] * Fix cybind bindings for add_module_to_program(), lazy_add_module_to_program() * Add test_with_minimal_nnvm_ir() * Remove tests/run_simple.py * Update cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx Co-authored-by: Leo Fang * Update cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx Co-authored-by: Leo Fang * Remove stray `f` (it is now a plain string, not an f-string anymore) * Add bootstrap_local_dev.sh script. * Fix nvvm.compile_program() failure for CUDA version 12.0 The original datalayout lacked explicit alignment and size definitions for i1, i8, i16, f32, f64, v64, and v128. The missing types are crucial for LLVM-based compilation in CUDA 12.0. Later CUDA versions are more forgiving, but 12.0 enforces a stricter layout. The stricter layout should resolve the issue for CUDA 12.0 without breaking compatibility with later versions. * Add test_verify_program_with_minimal_nnvm_ir() and rename some tests for clarity. * Complete test coverage. * Introduce noregex() to reduce backslash clutter. * Use a contextmanager to replace repeated try-finally. * Rename noregex to match_exact * Introduce get_program_log() helper. * Improve nvvm_program() Context Manager * Remove redundant "utf-8" * Also test with NVVM Bitcode (using a new pytest fixture). * Introduce compile_or_verify fixture. * Remove bootstrap_local_dev.sh, to be moved to a separate PR. * Update from codegen after config fix. * Update from codegen after config fix. * Update from codegen after adding CTK 11.x nvvm.h headers. Functional NO-OP. * Fix get_nvvm_dso_version_suffix() to match actual version numbers: ./11.0.3_450.51.06/cuda_nvcc/nvvm/lib64/libnvvm.so.3.3.0 ./11.1.1_455.32.00/cuda_nvcc/nvvm/lib64/libnvvm.so.3.3.0 ./11.2.2_460.32.03/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.3.1_465.19.01/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.4.4_470.82.01/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.5.1_495.29.05/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.6.2_510.47.03/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.7.1_515.65.01/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./11.8.0_520.61.05/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.0.1_525.85.12/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.1.1_530.30.02/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.2.2_535.104.05/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.3.2_545.23.08/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.4.1_550.54.15/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.5.1_555.42.06/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.6.2_560.35.03/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 ./12.8.0_570.86.10/cuda_nvcc/nvvm/lib64/libnvvm.so.4.0.0 For completeness, since the nvjitlink code is touched in this commit, these are the libnvJitLink version numbers: ./12.0.1_525.85.12/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.0.140 ./12.1.1_530.30.02/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.1.105 ./12.2.2_535.104.05/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.2.140 ./12.3.2_545.23.08/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.3.101 ./12.4.1_550.54.15/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.4.127 ./12.5.1_555.42.06/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.5.82 ./12.6.2_560.35.03/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.6.77 ./12.8.0_570.86.10/libnvjitlink/targets/x86_64-linux/lib/libnvJitLink.so.12.8.61 * find_libnvvm_so_via_proc_self_maps() Proof Of Concept * Revert "find_libnvvm_so_via_proc_self_maps() Proof Of Concept" This reverts commit b45bac231df6b2b482acfd28bf05c1c64111c8e3. * Add another rpath for finding libnvvm.so --------- (cherry picked from commit 2981bfd875a0576283fb54130d7b52f29071531c) --- .github/actions/fetch_ctk/action.yml | 2 +- .../cuda/bindings/_internal/nvvm.pxd | 25 ++ .../cuda/bindings/_internal/nvvm_linux.pyx | 360 +++++++++++++++++ .../cuda/bindings/_internal/nvvm_windows.pyx | 373 ++++++++++++++++++ .../cuda/bindings/_internal/utils.pxd | 169 ++++++++ .../cuda/bindings/_internal/utils.pyx | 137 +++++++ cuda_bindings/cuda/bindings/cynvvm.pxd | 46 +++ cuda_bindings/cuda/bindings/cynvvm.pyx | 59 +++ cuda_bindings/cuda/bindings/nvvm.pxd | 40 ++ cuda_bindings/cuda/bindings/nvvm.pyx | 284 +++++++++++++ cuda_bindings/pyproject.toml | 3 +- cuda_bindings/setup.py | 13 +- cuda_bindings/tests/test_nvvm.py | 193 +++++++++ 13 files changed, 1701 insertions(+), 3 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_internal/nvvm.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/utils.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/utils.pyx create mode 100644 cuda_bindings/cuda/bindings/cynvvm.pxd create mode 100644 cuda_bindings/cuda/bindings/cynvvm.pyx create mode 100644 cuda_bindings/cuda/bindings/nvvm.pxd create mode 100644 cuda_bindings/cuda/bindings/nvvm.pyx create mode 100644 cuda_bindings/tests/test_nvvm.py diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 18750c2d8b..798e443d47 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -148,4 +148,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm.pxd b/cuda_bindings/cuda/bindings/_internal/nvvm.pxd new file mode 100644 index 0000000000..0feebf2514 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvvm.pxd @@ -0,0 +1,25 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +from ..cynvvm cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvvmResult _nvvmVersion(int* major, int* minor) except* nogil +cdef nvvmResult _nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except* nogil +cdef nvvmResult _nvvmCreateProgram(nvvmProgram* prog) except* nogil +cdef nvvmResult _nvvmDestroyProgram(nvvmProgram* prog) except* nogil +cdef nvvmResult _nvvmAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil +cdef nvvmResult _nvvmLazyAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil +cdef nvvmResult _nvvmCompileProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil +cdef nvvmResult _nvvmVerifyProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil +cdef nvvmResult _nvvmGetCompiledResultSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil +cdef nvvmResult _nvvmGetCompiledResult(nvvmProgram prog, char* buffer) except* nogil +cdef nvvmResult _nvvmGetProgramLogSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil +cdef nvvmResult _nvvmGetProgramLog(nvvmProgram prog, char* buffer) except* nogil diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx new file mode 100644 index 0000000000..e21218772d --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -0,0 +1,360 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvvm_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvvm_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvvmVersion = NULL +cdef void* __nvvmIRVersion = NULL +cdef void* __nvvmCreateProgram = NULL +cdef void* __nvvmDestroyProgram = NULL +cdef void* __nvvmAddModuleToProgram = NULL +cdef void* __nvvmLazyAddModuleToProgram = NULL +cdef void* __nvvmCompileProgram = NULL +cdef void* __nvvmVerifyProgram = NULL +cdef void* __nvvmGetCompiledResultSize = NULL +cdef void* __nvvmGetCompiledResult = NULL +cdef void* __nvvmGetProgramLogSize = NULL +cdef void* __nvvmGetProgramLog = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvvm_dso_version_suffix(driver_ver): + so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvvm() except -1 nogil: + global __py_nvvm_init + if __py_nvvm_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvvmVersion + __nvvmVersion = dlsym(RTLD_DEFAULT, 'nvvmVersion') + if __nvvmVersion == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmVersion = dlsym(handle, 'nvvmVersion') + + global __nvvmIRVersion + __nvvmIRVersion = dlsym(RTLD_DEFAULT, 'nvvmIRVersion') + if __nvvmIRVersion == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmIRVersion = dlsym(handle, 'nvvmIRVersion') + + global __nvvmCreateProgram + __nvvmCreateProgram = dlsym(RTLD_DEFAULT, 'nvvmCreateProgram') + if __nvvmCreateProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmCreateProgram = dlsym(handle, 'nvvmCreateProgram') + + global __nvvmDestroyProgram + __nvvmDestroyProgram = dlsym(RTLD_DEFAULT, 'nvvmDestroyProgram') + if __nvvmDestroyProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmDestroyProgram = dlsym(handle, 'nvvmDestroyProgram') + + global __nvvmAddModuleToProgram + __nvvmAddModuleToProgram = dlsym(RTLD_DEFAULT, 'nvvmAddModuleToProgram') + if __nvvmAddModuleToProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmAddModuleToProgram = dlsym(handle, 'nvvmAddModuleToProgram') + + global __nvvmLazyAddModuleToProgram + __nvvmLazyAddModuleToProgram = dlsym(RTLD_DEFAULT, 'nvvmLazyAddModuleToProgram') + if __nvvmLazyAddModuleToProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmLazyAddModuleToProgram = dlsym(handle, 'nvvmLazyAddModuleToProgram') + + global __nvvmCompileProgram + __nvvmCompileProgram = dlsym(RTLD_DEFAULT, 'nvvmCompileProgram') + if __nvvmCompileProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmCompileProgram = dlsym(handle, 'nvvmCompileProgram') + + global __nvvmVerifyProgram + __nvvmVerifyProgram = dlsym(RTLD_DEFAULT, 'nvvmVerifyProgram') + if __nvvmVerifyProgram == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmVerifyProgram = dlsym(handle, 'nvvmVerifyProgram') + + global __nvvmGetCompiledResultSize + __nvvmGetCompiledResultSize = dlsym(RTLD_DEFAULT, 'nvvmGetCompiledResultSize') + if __nvvmGetCompiledResultSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmGetCompiledResultSize = dlsym(handle, 'nvvmGetCompiledResultSize') + + global __nvvmGetCompiledResult + __nvvmGetCompiledResult = dlsym(RTLD_DEFAULT, 'nvvmGetCompiledResult') + if __nvvmGetCompiledResult == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmGetCompiledResult = dlsym(handle, 'nvvmGetCompiledResult') + + global __nvvmGetProgramLogSize + __nvvmGetProgramLogSize = dlsym(RTLD_DEFAULT, 'nvvmGetProgramLogSize') + if __nvvmGetProgramLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmGetProgramLogSize = dlsym(handle, 'nvvmGetProgramLogSize') + + global __nvvmGetProgramLog + __nvvmGetProgramLog = dlsym(RTLD_DEFAULT, 'nvvmGetProgramLog') + if __nvvmGetProgramLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvvmGetProgramLog = dlsym(handle, 'nvvmGetProgramLog') + + __py_nvvm_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvvm() + cdef dict data = {} + + global __nvvmVersion + data["__nvvmVersion"] = __nvvmVersion + + global __nvvmIRVersion + data["__nvvmIRVersion"] = __nvvmIRVersion + + global __nvvmCreateProgram + data["__nvvmCreateProgram"] = __nvvmCreateProgram + + global __nvvmDestroyProgram + data["__nvvmDestroyProgram"] = __nvvmDestroyProgram + + global __nvvmAddModuleToProgram + data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram + + global __nvvmLazyAddModuleToProgram + data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram + + global __nvvmCompileProgram + data["__nvvmCompileProgram"] = __nvvmCompileProgram + + global __nvvmVerifyProgram + data["__nvvmVerifyProgram"] = __nvvmVerifyProgram + + global __nvvmGetCompiledResultSize + data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize + + global __nvvmGetCompiledResult + data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult + + global __nvvmGetProgramLogSize + data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize + + global __nvvmGetProgramLog + data["__nvvmGetProgramLog"] = __nvvmGetProgramLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvvmResult _nvvmVersion(int* major, int* minor) except* nogil: + global __nvvmVersion + _check_or_init_nvvm() + if __nvvmVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvvmVersion is not found") + return (__nvvmVersion)( + major, minor) + + +cdef nvvmResult _nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except* nogil: + global __nvvmIRVersion + _check_or_init_nvvm() + if __nvvmIRVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvvmIRVersion is not found") + return (__nvvmIRVersion)( + majorIR, minorIR, majorDbg, minorDbg) + + +cdef nvvmResult _nvvmCreateProgram(nvvmProgram* prog) except* nogil: + global __nvvmCreateProgram + _check_or_init_nvvm() + if __nvvmCreateProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmCreateProgram is not found") + return (__nvvmCreateProgram)( + prog) + + +cdef nvvmResult _nvvmDestroyProgram(nvvmProgram* prog) except* nogil: + global __nvvmDestroyProgram + _check_or_init_nvvm() + if __nvvmDestroyProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmDestroyProgram is not found") + return (__nvvmDestroyProgram)( + prog) + + +cdef nvvmResult _nvvmAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + global __nvvmAddModuleToProgram + _check_or_init_nvvm() + if __nvvmAddModuleToProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmAddModuleToProgram is not found") + return (__nvvmAddModuleToProgram)( + prog, buffer, size, name) + + +cdef nvvmResult _nvvmLazyAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + global __nvvmLazyAddModuleToProgram + _check_or_init_nvvm() + if __nvvmLazyAddModuleToProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmLazyAddModuleToProgram is not found") + return (__nvvmLazyAddModuleToProgram)( + prog, buffer, size, name) + + +cdef nvvmResult _nvvmCompileProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + global __nvvmCompileProgram + _check_or_init_nvvm() + if __nvvmCompileProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmCompileProgram is not found") + return (__nvvmCompileProgram)( + prog, numOptions, options) + + +cdef nvvmResult _nvvmVerifyProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + global __nvvmVerifyProgram + _check_or_init_nvvm() + if __nvvmVerifyProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmVerifyProgram is not found") + return (__nvvmVerifyProgram)( + prog, numOptions, options) + + +cdef nvvmResult _nvvmGetCompiledResultSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + global __nvvmGetCompiledResultSize + _check_or_init_nvvm() + if __nvvmGetCompiledResultSize == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetCompiledResultSize is not found") + return (__nvvmGetCompiledResultSize)( + prog, bufferSizeRet) + + +cdef nvvmResult _nvvmGetCompiledResult(nvvmProgram prog, char* buffer) except* nogil: + global __nvvmGetCompiledResult + _check_or_init_nvvm() + if __nvvmGetCompiledResult == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetCompiledResult is not found") + return (__nvvmGetCompiledResult)( + prog, buffer) + + +cdef nvvmResult _nvvmGetProgramLogSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + global __nvvmGetProgramLogSize + _check_or_init_nvvm() + if __nvvmGetProgramLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetProgramLogSize is not found") + return (__nvvmGetProgramLogSize)( + prog, bufferSizeRet) + + +cdef nvvmResult _nvvmGetProgramLog(nvvmProgram prog, char* buffer) except* nogil: + global __nvvmGetProgramLog + _check_or_init_nvvm() + if __nvvmGetProgramLog == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetProgramLog is not found") + return (__nvvmGetProgramLog)( + prog, buffer) diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx new file mode 100644 index 0000000000..b8e6795478 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -0,0 +1,373 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvvm_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + +import os +import site + +import win32api + + +############################################################################### +# Wrapper init +############################################################################### + +LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 +LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +cdef bint __py_nvvm_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvvmVersion = NULL +cdef void* __nvvmIRVersion = NULL +cdef void* __nvvmCreateProgram = NULL +cdef void* __nvvmDestroyProgram = NULL +cdef void* __nvvmAddModuleToProgram = NULL +cdef void* __nvvmLazyAddModuleToProgram = NULL +cdef void* __nvvmCompileProgram = NULL +cdef void* __nvvmVerifyProgram = NULL +cdef void* __nvvmGetCompiledResultSize = NULL +cdef void* __nvvmGetCompiledResult = NULL +cdef void* __nvvmGetProgramLogSize = NULL +cdef void* __nvvmGetProgramLog = NULL + + +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvvm_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = "nvvm64_40_0" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvvm') + + assert handle != 0 + return handle + + +cdef int _check_or_init_nvvm() except -1 nogil: + global __py_nvvm_init + if __py_nvvm_init: + return 0 + + cdef int err, driver_ver + with gil: + # Load driver to check version + try: + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + except Exception as e: + raise NotSupportedError(f'CUDA driver is not found ({e})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + if __cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + # Load library + handle = load_library(driver_ver) + + # Load function + global __nvvmVersion + try: + __nvvmVersion = win32api.GetProcAddress(handle, 'nvvmVersion') + except: + pass + + global __nvvmIRVersion + try: + __nvvmIRVersion = win32api.GetProcAddress(handle, 'nvvmIRVersion') + except: + pass + + global __nvvmCreateProgram + try: + __nvvmCreateProgram = win32api.GetProcAddress(handle, 'nvvmCreateProgram') + except: + pass + + global __nvvmDestroyProgram + try: + __nvvmDestroyProgram = win32api.GetProcAddress(handle, 'nvvmDestroyProgram') + except: + pass + + global __nvvmAddModuleToProgram + try: + __nvvmAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmAddModuleToProgram') + except: + pass + + global __nvvmLazyAddModuleToProgram + try: + __nvvmLazyAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmLazyAddModuleToProgram') + except: + pass + + global __nvvmCompileProgram + try: + __nvvmCompileProgram = win32api.GetProcAddress(handle, 'nvvmCompileProgram') + except: + pass + + global __nvvmVerifyProgram + try: + __nvvmVerifyProgram = win32api.GetProcAddress(handle, 'nvvmVerifyProgram') + except: + pass + + global __nvvmGetCompiledResultSize + try: + __nvvmGetCompiledResultSize = win32api.GetProcAddress(handle, 'nvvmGetCompiledResultSize') + except: + pass + + global __nvvmGetCompiledResult + try: + __nvvmGetCompiledResult = win32api.GetProcAddress(handle, 'nvvmGetCompiledResult') + except: + pass + + global __nvvmGetProgramLogSize + try: + __nvvmGetProgramLogSize = win32api.GetProcAddress(handle, 'nvvmGetProgramLogSize') + except: + pass + + global __nvvmGetProgramLog + try: + __nvvmGetProgramLog = win32api.GetProcAddress(handle, 'nvvmGetProgramLog') + except: + pass + + __py_nvvm_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvvm() + cdef dict data = {} + + global __nvvmVersion + data["__nvvmVersion"] = __nvvmVersion + + global __nvvmIRVersion + data["__nvvmIRVersion"] = __nvvmIRVersion + + global __nvvmCreateProgram + data["__nvvmCreateProgram"] = __nvvmCreateProgram + + global __nvvmDestroyProgram + data["__nvvmDestroyProgram"] = __nvvmDestroyProgram + + global __nvvmAddModuleToProgram + data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram + + global __nvvmLazyAddModuleToProgram + data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram + + global __nvvmCompileProgram + data["__nvvmCompileProgram"] = __nvvmCompileProgram + + global __nvvmVerifyProgram + data["__nvvmVerifyProgram"] = __nvvmVerifyProgram + + global __nvvmGetCompiledResultSize + data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize + + global __nvvmGetCompiledResult + data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult + + global __nvvmGetProgramLogSize + data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize + + global __nvvmGetProgramLog + data["__nvvmGetProgramLog"] = __nvvmGetProgramLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvvmResult _nvvmVersion(int* major, int* minor) except* nogil: + global __nvvmVersion + _check_or_init_nvvm() + if __nvvmVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvvmVersion is not found") + return (__nvvmVersion)( + major, minor) + + +cdef nvvmResult _nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except* nogil: + global __nvvmIRVersion + _check_or_init_nvvm() + if __nvvmIRVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvvmIRVersion is not found") + return (__nvvmIRVersion)( + majorIR, minorIR, majorDbg, minorDbg) + + +cdef nvvmResult _nvvmCreateProgram(nvvmProgram* prog) except* nogil: + global __nvvmCreateProgram + _check_or_init_nvvm() + if __nvvmCreateProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmCreateProgram is not found") + return (__nvvmCreateProgram)( + prog) + + +cdef nvvmResult _nvvmDestroyProgram(nvvmProgram* prog) except* nogil: + global __nvvmDestroyProgram + _check_or_init_nvvm() + if __nvvmDestroyProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmDestroyProgram is not found") + return (__nvvmDestroyProgram)( + prog) + + +cdef nvvmResult _nvvmAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + global __nvvmAddModuleToProgram + _check_or_init_nvvm() + if __nvvmAddModuleToProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmAddModuleToProgram is not found") + return (__nvvmAddModuleToProgram)( + prog, buffer, size, name) + + +cdef nvvmResult _nvvmLazyAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + global __nvvmLazyAddModuleToProgram + _check_or_init_nvvm() + if __nvvmLazyAddModuleToProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmLazyAddModuleToProgram is not found") + return (__nvvmLazyAddModuleToProgram)( + prog, buffer, size, name) + + +cdef nvvmResult _nvvmCompileProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + global __nvvmCompileProgram + _check_or_init_nvvm() + if __nvvmCompileProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmCompileProgram is not found") + return (__nvvmCompileProgram)( + prog, numOptions, options) + + +cdef nvvmResult _nvvmVerifyProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + global __nvvmVerifyProgram + _check_or_init_nvvm() + if __nvvmVerifyProgram == NULL: + with gil: + raise FunctionNotFoundError("function nvvmVerifyProgram is not found") + return (__nvvmVerifyProgram)( + prog, numOptions, options) + + +cdef nvvmResult _nvvmGetCompiledResultSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + global __nvvmGetCompiledResultSize + _check_or_init_nvvm() + if __nvvmGetCompiledResultSize == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetCompiledResultSize is not found") + return (__nvvmGetCompiledResultSize)( + prog, bufferSizeRet) + + +cdef nvvmResult _nvvmGetCompiledResult(nvvmProgram prog, char* buffer) except* nogil: + global __nvvmGetCompiledResult + _check_or_init_nvvm() + if __nvvmGetCompiledResult == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetCompiledResult is not found") + return (__nvvmGetCompiledResult)( + prog, buffer) + + +cdef nvvmResult _nvvmGetProgramLogSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + global __nvvmGetProgramLogSize + _check_or_init_nvvm() + if __nvvmGetProgramLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetProgramLogSize is not found") + return (__nvvmGetProgramLogSize)( + prog, bufferSizeRet) + + +cdef nvvmResult _nvvmGetProgramLog(nvvmProgram prog, char* buffer) except* nogil: + global __nvvmGetProgramLog + _check_or_init_nvvm() + if __nvvmGetProgramLog == NULL: + with gil: + raise FunctionNotFoundError("function nvvmGetProgramLog is not found") + return (__nvvmGetProgramLog)( + prog, buffer) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd new file mode 100644 index 0000000000..67f88e9f20 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -0,0 +1,169 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from libc.stdint cimport int32_t, int64_t, intptr_t +from libcpp.vector cimport vector +from libcpp cimport bool as cppbool +from libcpp cimport nullptr_t, nullptr +from libcpp.memory cimport unique_ptr + + +cdef extern from * nogil: + """ + template + class nullable_unique_ptr { + public: + nullable_unique_ptr() noexcept = default; + + nullable_unique_ptr(std::nullptr_t) noexcept = delete; + + explicit nullable_unique_ptr(T* data, bool own_data): + own_data_(own_data) + { + if (own_data) + manager_.reset(data); + else + raw_data_ = data; + } + + nullable_unique_ptr(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr& operator=(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + } + + nullable_unique_ptr& operator=(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + return *this; + } + + ~nullable_unique_ptr() = default; + + void reset(T* data, bool own_data) + { + own_data_ = own_data; + if (own_data_) + { + manager_.reset(data); + raw_data_ = nullptr; + } + else + { + manager_.reset(nullptr); + raw_data_ = data; + } + } + + void swap(nullable_unique_ptr& other) noexcept + { + std::swap(manager_, other.manager_); + std::swap(raw_data_, other.raw_data_); + std::swap(own_data_, other.own_data_); + } + + /* + * Get the pointer to the underlying object (this is different from data()!). + */ + T* get() const noexcept + { + if (own_data_) + return manager_.get(); + else + return raw_data_; + } + + /* + * Get the pointer to the underlying buffer (this is different from get()!). + */ + void* data() noexcept + { + if (own_data_) + return manager_.get()->data(); + else + return raw_data_; + } + + T& operator*() + { + if (own_data_) + return *manager_; + else + return *raw_data_; + } + + private: + std::unique_ptr manager_{}; + T* raw_data_{nullptr}; + bool own_data_{false}; + }; + """ + # xref: cython/Cython/Includes/libcpp/memory.pxd + cdef cppclass nullable_unique_ptr[T]: + nullable_unique_ptr() + nullable_unique_ptr(T*, cppbool) + nullable_unique_ptr(nullable_unique_ptr[T]&) + + # Modifiers + void reset(T*, cppbool) + void swap(nullable_unique_ptr&) + + # Observers + T* get() + T& operator*() + void* data() + + +ctypedef fused ResT: + int + int32_t + int64_t + char + float + double + + +ctypedef fused PtrT: + void + + +cdef cppclass nested_resource[T]: + nullable_unique_ptr[ vector[intptr_t] ] ptrs + nullable_unique_ptr[ vector[vector[T]] ] nested_resource_ptr + + +# accepts the output pointer as input to use the return value for exception propagation +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1 +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1 +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1 + +cdef bint is_nested_sequence(data) +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* + +cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx new file mode 100644 index 0000000000..d4fd1d813c --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -0,0 +1,137 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +cimport cpython +from libc.stdint cimport intptr_t +from libcpp.utility cimport move +from cython.operator cimport dereference as deref + + +cdef bint is_nested_sequence(data): + if not cpython.PySequence_Check(data): + return False + else: + for i in data: + if not cpython.PySequence_Check(i): + return False + else: + return True + + +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*: + """The caller must ensure ``buf`` is alive when the returned pointer is in use.""" + cdef void* bufPtr + cdef int flags = cpython.PyBUF_ANY_CONTIGUOUS + if not readonly: + flags |= cpython.PyBUF_WRITABLE + cdef int status = -1 + cdef cpython.Py_buffer view + + if isinstance(buf, int): + bufPtr = buf + else: # try buffer protocol + try: + status = cpython.PyObject_GetBuffer(buf, &view, flags) + # when the caller does not provide a size, it is set to -1 at generate-time by cybind + if size != -1: + assert view.len == size + assert view.ndim == 1 + except Exception as e: + adj = "writable " if not readonly else "" + raise ValueError( + "buf must be either a Python int representing the pointer " + f"address to a valid buffer, or a 1D contiguous {adj}" + "buffer, of size bytes") from e + else: + bufPtr = view.buf + finally: + if status == 0: + cpython.PyBuffer_Release(&view) + + return bufPtr + + +# Cython can't infer the ResT overload when it is wrapped in nullable_unique_ptr, +# so we need a dummy (__unused) input argument to help it +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1: + if cpython.PySequence_Check(obj): + vec = new vector[ResT](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop + in_out_ptr.reset(vec, True) + for i in range(len(obj)): + deref(vec)[i] = obj[i] + else: + in_out_ptr.reset(obj, False) + return 0 + + +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1: + if cpython.PySequence_Check(obj): + vec = new vector[PtrT*](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop + in_out_ptr.reset(vec, True) + for i in range(len(obj)): + deref(vec)[i] = (obj[i]) + else: + in_out_ptr.reset(obj, False) + return 0 + + +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1: + cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr + cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr + cdef vector[intptr_t]* nested_vec = NULL + cdef vector[vector[ResT]]* nested_res_vec = NULL + cdef size_t i = 0, length = 0 + cdef intptr_t addr + + if is_nested_sequence(obj): + length = len(obj) + nested_res_vec = new vector[vector[ResT]](length) + nested_vec = new vector[intptr_t](length) + # set the ownership immediately to avoid leaking memory in case of + # exception in the following loop + nested_res_ptr.reset(nested_res_vec, True) + nested_ptr.reset(nested_vec, True) + for i, obj_i in enumerate(obj): + if ResT is char: + obj_i_bytes = ((obj_i)).encode() + str_len = (len(obj_i_bytes)) + 1 # including null termination + deref(nested_res_vec)[i].resize(str_len) + obj_i_ptr = (obj_i_bytes) + # cast to size_t explicitly to work around a potentially Cython bug + deref(nested_res_vec)[i].assign(obj_i_ptr, obj_i_ptr + str_len) + else: + deref(nested_res_vec)[i] = obj_i + deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) + elif cpython.PySequence_Check(obj): + length = len(obj) + nested_vec = new vector[intptr_t](length) + nested_ptr.reset(nested_vec, True) + for i, addr in enumerate(obj): + deref(nested_vec)[i] = addr + nested_res_ptr.reset(NULL, False) + else: + # obj is an int (ResT**) + nested_res_ptr.reset(NULL, False) + nested_ptr.reset(obj, False) + + in_out_ptr.ptrs = move(nested_ptr) + in_out_ptr.nested_resource_ptr = move(nested_res_ptr) + return 0 + + +class FunctionNotFoundError(RuntimeError): pass + +class NotSupportedError(RuntimeError): pass + + +cdef tuple get_nvvm_dso_version_suffix(int driver_ver): + if 11000 <= driver_ver < 11020: + return ('3', '') + if 11020 <= driver_ver < 13000: + return ('4', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/cynvvm.pxd b/cuda_bindings/cuda/bindings/cynvvm.pxd new file mode 100644 index 0000000000..fa27d99bbf --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvvm.pxd @@ -0,0 +1,46 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvvmResult "nvvmResult": + NVVM_SUCCESS "NVVM_SUCCESS" = 0 + NVVM_ERROR_OUT_OF_MEMORY "NVVM_ERROR_OUT_OF_MEMORY" = 1 + NVVM_ERROR_PROGRAM_CREATION_FAILURE "NVVM_ERROR_PROGRAM_CREATION_FAILURE" = 2 + NVVM_ERROR_IR_VERSION_MISMATCH "NVVM_ERROR_IR_VERSION_MISMATCH" = 3 + NVVM_ERROR_INVALID_INPUT "NVVM_ERROR_INVALID_INPUT" = 4 + NVVM_ERROR_INVALID_PROGRAM "NVVM_ERROR_INVALID_PROGRAM" = 5 + NVVM_ERROR_INVALID_IR "NVVM_ERROR_INVALID_IR" = 6 + NVVM_ERROR_INVALID_OPTION "NVVM_ERROR_INVALID_OPTION" = 7 + NVVM_ERROR_NO_MODULE_IN_PROGRAM "NVVM_ERROR_NO_MODULE_IN_PROGRAM" = 8 + NVVM_ERROR_COMPILATION "NVVM_ERROR_COMPILATION" = 9 + NVVM_ERROR_CANCELLED "NVVM_ERROR_CANCELLED" = 10 + + +# types +ctypedef void* nvvmProgram 'nvvmProgram' + + +############################################################################### +# Functions +############################################################################### + +cdef nvvmResult nvvmVersion(int* major, int* minor) except* nogil +cdef nvvmResult nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except* nogil +cdef nvvmResult nvvmCreateProgram(nvvmProgram* prog) except* nogil +cdef nvvmResult nvvmDestroyProgram(nvvmProgram* prog) except* nogil +cdef nvvmResult nvvmAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil +cdef nvvmResult nvvmLazyAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil +cdef nvvmResult nvvmCompileProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil +cdef nvvmResult nvvmVerifyProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil +cdef nvvmResult nvvmGetCompiledResultSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil +cdef nvvmResult nvvmGetCompiledResult(nvvmProgram prog, char* buffer) except* nogil +cdef nvvmResult nvvmGetProgramLogSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil +cdef nvvmResult nvvmGetProgramLog(nvvmProgram prog, char* buffer) except* nogil diff --git a/cuda_bindings/cuda/bindings/cynvvm.pyx b/cuda_bindings/cuda/bindings/cynvvm.pyx new file mode 100644 index 0000000000..1812998e18 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvvm.pyx @@ -0,0 +1,59 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +from ._internal cimport nvvm as _nvvm + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvvmResult nvvmVersion(int* major, int* minor) except* nogil: + return _nvvm._nvvmVersion(major, minor) + + +cdef nvvmResult nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except* nogil: + return _nvvm._nvvmIRVersion(majorIR, minorIR, majorDbg, minorDbg) + + +cdef nvvmResult nvvmCreateProgram(nvvmProgram* prog) except* nogil: + return _nvvm._nvvmCreateProgram(prog) + + +cdef nvvmResult nvvmDestroyProgram(nvvmProgram* prog) except* nogil: + return _nvvm._nvvmDestroyProgram(prog) + + +cdef nvvmResult nvvmAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + return _nvvm._nvvmAddModuleToProgram(prog, buffer, size, name) + + +cdef nvvmResult nvvmLazyAddModuleToProgram(nvvmProgram prog, const char* buffer, size_t size, const char* name) except* nogil: + return _nvvm._nvvmLazyAddModuleToProgram(prog, buffer, size, name) + + +cdef nvvmResult nvvmCompileProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + return _nvvm._nvvmCompileProgram(prog, numOptions, options) + + +cdef nvvmResult nvvmVerifyProgram(nvvmProgram prog, int numOptions, const char** options) except* nogil: + return _nvvm._nvvmVerifyProgram(prog, numOptions, options) + + +cdef nvvmResult nvvmGetCompiledResultSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + return _nvvm._nvvmGetCompiledResultSize(prog, bufferSizeRet) + + +cdef nvvmResult nvvmGetCompiledResult(nvvmProgram prog, char* buffer) except* nogil: + return _nvvm._nvvmGetCompiledResult(prog, buffer) + + +cdef nvvmResult nvvmGetProgramLogSize(nvvmProgram prog, size_t* bufferSizeRet) except* nogil: + return _nvvm._nvvmGetProgramLogSize(prog, bufferSizeRet) + + +cdef nvvmResult nvvmGetProgramLog(nvvmProgram prog, char* buffer) except* nogil: + return _nvvm._nvvmGetProgramLog(prog, buffer) diff --git a/cuda_bindings/cuda/bindings/nvvm.pxd b/cuda_bindings/cuda/bindings/nvvm.pxd new file mode 100644 index 0000000000..dc8b2eea1b --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvvm.pxd @@ -0,0 +1,40 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .cynvvm cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvvmProgram Program + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvvmResult _Result + + +############################################################################### +# Functions +############################################################################### + +cpdef tuple version() +cpdef tuple ir_version() +cpdef intptr_t create_program() except? 0 +cpdef add_module_to_program(intptr_t prog, buffer, size_t size, name) +cpdef lazy_add_module_to_program(intptr_t prog, buffer, size_t size, name) +cpdef compile_program(intptr_t prog, int num_options, options) +cpdef verify_program(intptr_t prog, int num_options, options) +cpdef size_t get_compiled_result_size(intptr_t prog) except? 0 +cpdef get_compiled_result(intptr_t prog, buffer) +cpdef size_t get_program_log_size(intptr_t prog) except? 0 +cpdef get_program_log(intptr_t prog, buffer) diff --git a/cuda_bindings/cuda/bindings/nvvm.pyx b/cuda_bindings/cuda/bindings/nvvm.pyx new file mode 100644 index 0000000000..2a334994c2 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvvm.pyx @@ -0,0 +1,284 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_buffer_pointer, get_nested_resource_ptr, + nested_resource) + +from enum import IntEnum as _IntEnum + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvvmResult`.""" + SUCCESS = NVVM_SUCCESS + ERROR_OUT_OF_MEMORY = NVVM_ERROR_OUT_OF_MEMORY + ERROR_PROGRAM_CREATION_FAILURE = NVVM_ERROR_PROGRAM_CREATION_FAILURE + ERROR_IR_VERSION_MISMATCH = NVVM_ERROR_IR_VERSION_MISMATCH + ERROR_INVALID_INPUT = NVVM_ERROR_INVALID_INPUT + ERROR_INVALID_PROGRAM = NVVM_ERROR_INVALID_PROGRAM + ERROR_INVALID_IR = NVVM_ERROR_INVALID_IR + ERROR_INVALID_OPTION = NVVM_ERROR_INVALID_OPTION + ERROR_NO_MODULE_IN_PROGRAM = NVVM_ERROR_NO_MODULE_IN_PROGRAM + ERROR_COMPILATION = NVVM_ERROR_COMPILATION + ERROR_CANCELLED = NVVM_ERROR_CANCELLED + + +############################################################################### +# Error handling +############################################################################### + +class nvvmError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvvmError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvvmError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy_program(intptr_t prog): + """Destroy a program. + + Args: + prog (intptr_t): nvvm prog. + + .. seealso:: `nvvmDestroyProgram` + """ + cdef Program p = prog + with nogil: + status = nvvmDestroyProgram(&p) + check_status(status) + + +cpdef tuple version(): + """Get the NVVM version. + + Returns: + A 2-tuple containing: + + - int: NVVM major version number. + - int: NVVM minor version number. + + .. seealso:: `nvvmVersion` + """ + cdef int major + cdef int minor + with nogil: + status = nvvmVersion(&major, &minor) + check_status(status) + return (major, minor) + + +cpdef tuple ir_version(): + """Get the NVVM IR version. + + Returns: + A 4-tuple containing: + + - int: NVVM IR major version number. + - int: NVVM IR minor version number. + - int: NVVM IR debug metadata major version number. + - int: NVVM IR debug metadata minor version number. + + .. seealso:: `nvvmIRVersion` + """ + cdef int major_ir + cdef int minor_ir + cdef int major_dbg + cdef int minor_dbg + with nogil: + status = nvvmIRVersion(&major_ir, &minor_ir, &major_dbg, &minor_dbg) + check_status(status) + return (major_ir, minor_ir, major_dbg, minor_dbg) + + +cpdef intptr_t create_program() except? 0: + """Create a program, and set the value of its handle to ``*prog``. + + Returns: + intptr_t: NVVM program. + + .. seealso:: `nvvmCreateProgram` + """ + cdef Program prog + with nogil: + status = nvvmCreateProgram(&prog) + check_status(status) + return prog + + +cpdef add_module_to_program(intptr_t prog, buffer, size_t size, name): + """Add a module level NVVM IR to a program. + + Args: + prog (intptr_t): NVVM program. + buffer (bytes): NVVM IR module in the bitcode or text representation. + size (size_t): Size of the NVVM IR module. + name (str): Name of the NVVM IR module. If NULL, "" is used as the name. + + .. seealso:: `nvvmAddModuleToProgram` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, size, readonly=True) + if not isinstance(name, str): + raise TypeError("name must be a Python str") + cdef bytes _temp_name_ = (name).encode() + cdef char* _name_ = _temp_name_ + with nogil: + status = nvvmAddModuleToProgram(prog, _buffer_, size, _name_) + check_status(status) + + +cpdef lazy_add_module_to_program(intptr_t prog, buffer, size_t size, name): + """Add a module level NVVM IR to a program. + + Args: + prog (intptr_t): NVVM program. + buffer (bytes): NVVM IR module in the bitcode representation. + size (size_t): Size of the NVVM IR module. + name (str): Name of the NVVM IR module. If NULL, "" is used as the name. + + .. seealso:: `nvvmLazyAddModuleToProgram` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, size, readonly=True) + if not isinstance(name, str): + raise TypeError("name must be a Python str") + cdef bytes _temp_name_ = (name).encode() + cdef char* _name_ = _temp_name_ + with nogil: + status = nvvmLazyAddModuleToProgram(prog, _buffer_, size, _name_) + check_status(status) + + +cpdef compile_program(intptr_t prog, int num_options, options): + """Compile the NVVM program. + + Args: + prog (intptr_t): NVVM program. + num_options (int): Number of compiler ``options`` passed. + options (object): Compiler options in the form of C string array. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + + .. seealso:: `nvvmCompileProgram` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + with nogil: + status = nvvmCompileProgram(prog, num_options, (_options_.ptrs.data())) + check_status(status) + + +cpdef verify_program(intptr_t prog, int num_options, options): + """Verify the NVVM program. + + Args: + prog (intptr_t): NVVM program. + num_options (int): Number of compiler ``options`` passed. + options (object): Compiler options in the form of C string array. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + + .. seealso:: `nvvmVerifyProgram` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + with nogil: + status = nvvmVerifyProgram(prog, num_options, (_options_.ptrs.data())) + check_status(status) + + +cpdef size_t get_compiled_result_size(intptr_t prog) except? 0: + """Get the size of the compiled result. + + Args: + prog (intptr_t): NVVM program. + + Returns: + size_t: Size of the compiled result (including the trailing NULL). + + .. seealso:: `nvvmGetCompiledResultSize` + """ + cdef size_t buffer_size_ret + with nogil: + status = nvvmGetCompiledResultSize(prog, &buffer_size_ret) + check_status(status) + return buffer_size_ret + + +cpdef get_compiled_result(intptr_t prog, buffer): + """Get the compiled result. + + Args: + prog (intptr_t): NVVM program. + buffer (bytes): Compiled result. + + .. seealso:: `nvvmGetCompiledResult` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + status = nvvmGetCompiledResult(prog, _buffer_) + check_status(status) + + +cpdef size_t get_program_log_size(intptr_t prog) except? 0: + """Get the Size of Compiler/Verifier Message. + + Args: + prog (intptr_t): NVVM program. + + Returns: + size_t: Size of the compilation/verification log (including the trailing NULL). + + .. seealso:: `nvvmGetProgramLogSize` + """ + cdef size_t buffer_size_ret + with nogil: + status = nvvmGetProgramLogSize(prog, &buffer_size_ret) + check_status(status) + return buffer_size_ret + + +cpdef get_program_log(intptr_t prog, buffer): + """Get the Compiler/Verifier Message. + + Args: + prog (intptr_t): NVVM program. + buffer (bytes): Compilation/Verification log. + + .. seealso:: `nvvmGetProgramLog` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + status = nvvmGetProgramLog(prog, _buffer_) + check_status(status) diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 0abf0672f6..028c20f993 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -34,7 +34,8 @@ dependencies = [ [project.optional-dependencies] all = [ - "nvidia-cuda-nvrtc-cu11" + "nvidia-cuda-nvcc-cu11", + "nvidia-cuda-nvrtc-cu11", ] [project.urls] diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 7b4884f6ff..0d2b938d40 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -228,6 +228,9 @@ def do_cythonize(extensions): ["cuda/bindings/*.pyx"], # public (deprecated, to be removed) ["cuda/*.pyx"], + # internal files used by generated bindings + ["cuda/bindings/_internal/nvvm.pyx"], + ["cuda/bindings/_internal/utils.pyx"], ] for sources in sources_list: @@ -260,7 +263,15 @@ def build_extension(self, ext): # Allow extensions to discover libraries at runtime # relative their wheels installation. if ext.name == "cuda.bindings._bindings.cynvrtc": - ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" + ldflag = "-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib" + elif ext.name == "cuda.bindings._internal.nvvm": + # from /site-packages/cuda/bindings/_internal/ + # to /site-packages/nvidia/cuda_nvcc/nvvm/lib64/ + rel1 = "$ORIGIN/../../../nvidia/cuda_nvcc/nvvm/lib64" + # from /lib/python3.*/site-packages/cuda/bindings/_internal/ + # to /lib/nvvm/lib64/ + rel2 = "$ORIGIN/../../../../../../nvvm/lib64" + ldflag = f"-Wl,--disable-new-dtags,-rpath,{rel1},-rpath,{rel2}" else: ldflag = None diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py new file mode 100644 index 0000000000..4bf0a3cebb --- /dev/null +++ b/cuda_bindings/tests/test_nvvm.py @@ -0,0 +1,193 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import base64 +import re +from contextlib import contextmanager + +import pytest + +from cuda.bindings import nvvm + +MINIMAL_NVVMIR_TXT = b"""\ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +target triple = "nvptx64-nvidia-cuda" + +define void @kernel() { +entry: + ret void +} + +!nvvm.annotations = !{!0} +!0 = !{void ()* @kernel, !"kernel", i32 1} + +!nvvmir.version = !{!1} +!1 = !{i32 2, i32 0, i32 3, i32 1} +""" # noqa: E501 + +# Equivalent to MINIMAL_NVVMIR_TXT +MINIMAL_NVVMIR_BITCODE = base64.b64decode(""" +QkPA3jUUAAAFAAAAYgwwJElZvmbu034tRAEyBQAAAAAhDAAAJAEAAAsCIQACAAAAEwAAAAeBI5FB +yARJBhAyOZIBhAwlBQgZHgSLYoAMRQJCkgtCZBAyFDgIGEsKMjKISJAUIENGiKUAGTJC5EgOkJEh +xFBBUYGM4YPligQZRgaJIAAACwAAADIiyAggZIUEkyGkhASTIeOEoZAUEkyGjAuEZEwQFCMAJQBl +IGCOAAwAAAAAEyZ3sAd4oAd8sAM6aAN3sId0IId0CIc2GId6IIdw2OAS5dAG8KAHdkAHemAHdKAH +dkAHbZAOcaAHeKAHeNAG6YAHeoAHeoAHbZAOcWAHehAHdqAHcWAHbZAOcyAHejAHcqAHcyAHbZAO +dkAHemAHdKAHdkAHbZAOcSAHeKAHcSAHeKAHcSAHeNAG5jAHcqAHcyAHejAHctAG5mAHdKAHdkAH +emAHdNAG9hAHdqAHcWAHehAHdtAG9jAHcqAHcyAHejAHctAG9mAHdKAHdkAHemAHdNAG9hAHcoAH +ehAHcoAHehAHcoAHbeAOcWAHejAHcqAHdkAHGiEMGTFIgzDA8jdVxSCRvyxDIsAIAAAAAAAAAAAA +AEBig0BRlAAAgCwQBgAAADIemAwZEUyQjAkmR8YEQ2IJFMEIQBkAALEYAABtAAAAMwiAHMThHGYU +AT2IQziEw4xCgAd5eAdzmHEM5gAP7RAO9IAOMwxCHsLBHc6hHGYwBT2IQziEgxvMAz3IQz2MAz3M +eIx0cAd7CAd5SIdwcAd6cAN2eIdwIIcZzBEO7JAO4TAPbjAP4/AO8FAOMxDEHd4hHNghHcJhHmYw +iTu8gzvQQzm0Azy8gzyEAzvM8BR2YAd7aAc3aIdyaAc3gIdwkIdwYAd2KAd2+AV2eId3gIdfCIdx +GIdymId5mIEs7vAO7uAO9cAO7DADYsihHOShHMyhHOShHNxhHMohHMSBHcphBtaQQznIQzmYQznI +Qzm4wziUQziIAzuUwy+8gzz8gjvUAzuwwwzHaYdwWIdycIN0aAd4YId0GId0oIcZzlMP7gAP8lAO +5JAO40AP4SAO7FAOMyAoHdzBHsJBHtIhHNyBHtzgHOThHeoBHmYYUTiwQzqcgzvMUCR2YAd7aAc3 +YId3eAd4mFFM9JAP8FAOMx5qHsphHOghHd7BHX4BHuShHMwhHfBhBlSFgzjMwzuwQz3QQzn8wjzk +QzuIwzuww4zFCod5mId3GId0CAd6KAdyAAAAAHkgAAAeAAAAYh5IIEOIDBk5GSSQkUDGyMhoIlAI +FDKeGBkhR8iQUQwIBQAABgAAAGtlcm5lbAAAIwgCMIJABCMIhDCCQAwjCAQxwyAEwwwEURiDjAQm +KCE3O7s2lzA3tze6MLq0N7e5UQIjHTc7u7Y0ORe7Mrm5tDe3UYIDAAAAqRgAAAsAAAALCnIoh3eA +B3pYcJhDPbjDOLBDOdDDguYcxqEN6EEewsEd5iEd6CEd3sEdANEQAAAGAAAAB8w8pIM7nAM7lAM9 +oIM8lEM4kMMBAAAAYSAAAAYAAAATBAGGAwEAAAIAAAAHUBDNFGEAAAAAAABxIAAAAwAAADIOECKE +AKACAAAAAAAAAABlDAAAHQAAABIDlOgAAAAAAAAAAAYAAAAFAAAARAAAAAEAAABQAAAAAAAAAFAA +AAABAAAAaAAAAAAAAAALAAAAEwAAAB4AAAARAAAALwAAAAAAAAAAAAAAAQAAAAAAAAAAAAAABgAA +AAAAAAAGAAAA/////wAkAAAAAAAAXQwAAA8AAAASA5RvAAAAAGtlcm5lbDUuMC4xbnZwdHg2NC1u +dmlkaWEtY3VkYW1pbmltYWxfbnZ2bWlyLmxsAAAAAAA= +""") +# To regenerate, pull and start a docker container: +# docker pull centos/llvm-toolset-7-centos7 +# docker run -it centos/llvm-toolset-7-centos7 /bin/bash +# In the docker container, copy MINIMAL_NVVMIR_TXT to a file with name minimal_nvvmir.ll +# Then run: +# llvm-as minimal_nvvmir.ll -o minimal_nvvmir.bc +# Save this to encode.py: +# import base64, sys, textwrap +# bitcode = open(sys.argv[1], "rb").read() +# encoded_bitcode = base64.b64encode(bitcode).decode("ascii") +# wrapped_base64 = "\n".join(textwrap.wrap(encoded_bitcode, width=76)) +# print(wrapped_base64) +# Then run: +# python encode.py minimal_nvvmir.bc + + +@pytest.fixture(params=["txt", "bitcode"]) +def minimal_nvvmir(request): + return MINIMAL_NVVMIR_TXT if request.param == "txt" else MINIMAL_NVVMIR_BITCODE + + +@pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program]) +def compile_or_verify(request): + return request.param + + +def match_exact(s): + return "^" + re.escape(s) + "$" + + +@contextmanager +def nvvm_program() -> int: + prog: int = nvvm.create_program() + try: + yield prog + finally: + nvvm.destroy_program(prog) + + +def get_program_log(prog): + buffer = bytearray(nvvm.get_program_log_size(prog)) + nvvm.get_program_log(prog, buffer) + return buffer.decode(errors="backslashreplace") + + +def test_nvvm_version(): + ver = nvvm.version() + assert len(ver) == 2 + assert ver >= (2, 0) + + +def test_nvvm_ir_version(): + ver = nvvm.ir_version() + assert len(ver) == 4 + assert ver >= (2, 0, 3, 1) + + +def test_create_and_destroy(): + with nvvm_program() as prog: + assert isinstance(prog, int) + assert prog != 0 + + +@pytest.mark.parametrize("add_fn", [nvvm.add_module_to_program, nvvm.lazy_add_module_to_program]) +def test_add_module_to_program_fail(add_fn): + with nvvm_program() as prog, pytest.raises(ValueError): + # Passing a C NULL pointer generates "ERROR_INVALID_INPUT (4)", + # but that is not possible through our Python bindings. + # The ValueError originates from the cython bindings code. + add_fn(prog, None, 0, "FileNameHere.ll") + + +def test_c_or_v_program_fail_no_module(compile_or_verify): + with nvvm_program() as prog, pytest.raises(nvvm.nvvmError, match=match_exact("ERROR_NO_MODULE_IN_PROGRAM (8)")): + compile_or_verify(prog, 0, []) + + +def test_c_or_v_program_fail_invalid_ir(compile_or_verify): + expected_error = "ERROR_COMPILATION (9)" if compile_or_verify is nvvm.compile_program else "ERROR_INVALID_IR (6)" + nvvm_ll = b"This is not NVVM IR" + with nvvm_program() as prog: + nvvm.add_module_to_program(prog, nvvm_ll, len(nvvm_ll), "FileNameHere.ll") + with pytest.raises(nvvm.nvvmError, match=match_exact(expected_error)): + compile_or_verify(prog, 0, []) + assert get_program_log(prog) == "FileNameHere.ll (1, 0): parse expected top-level entity\x00" + + +def test_c_or_v_program_fail_bad_option(minimal_nvvmir, compile_or_verify): + with nvvm_program() as prog: + nvvm.add_module_to_program(prog, minimal_nvvmir, len(minimal_nvvmir), "FileNameHere.ll") + with pytest.raises(nvvm.nvvmError, match=match_exact("ERROR_INVALID_OPTION (7)")): + compile_or_verify(prog, 1, ["BadOption"]) + assert get_program_log(prog) == "libnvvm : error: BadOption is an unsupported option\x00" + + +@pytest.mark.parametrize( + ("get_size", "get_buffer"), + [ + (nvvm.get_compiled_result_size, nvvm.get_compiled_result), + (nvvm.get_program_log_size, nvvm.get_program_log), + ], +) +def test_get_buffer_empty(get_size, get_buffer): + with nvvm_program() as prog: + buffer_size = get_size(prog) + assert buffer_size == 1 + buffer = bytearray(buffer_size) + get_buffer(prog, buffer) + assert buffer == b"\x00" + + +@pytest.mark.parametrize("options", [[], ["-opt=0"], ["-opt=3", "-g"]]) +def test_compile_program_with_minimal_nnvm_ir(minimal_nvvmir, options): + with nvvm_program() as prog: + nvvm.add_module_to_program(prog, minimal_nvvmir, len(minimal_nvvmir), "FileNameHere.ll") + try: + nvvm.compile_program(prog, len(options), options) + except nvvm.nvvmError as e: + raise RuntimeError(get_program_log(prog)) from e + else: + log_size = nvvm.get_program_log_size(prog) + assert log_size == 1 + buffer = bytearray(log_size) + nvvm.get_program_log(prog, buffer) + assert buffer == b"\x00" + result_size = nvvm.get_compiled_result_size(prog) + buffer = bytearray(result_size) + nvvm.get_compiled_result(prog, buffer) + assert ".visible .entry kernel()" in buffer.decode() + + +@pytest.mark.parametrize("options", [[], ["-opt=0"], ["-opt=3", "-g"]]) +def test_verify_program_with_minimal_nnvm_ir(minimal_nvvmir, options): + with nvvm_program() as prog: + nvvm.add_module_to_program(prog, minimal_nvvmir, len(minimal_nvvmir), "FileNameHere.ll") + nvvm.verify_program(prog, len(options), options) From 639319e0595bc2cb9bd9cf26ce82aa0fd53fa25a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 8 Feb 2025 04:18:12 +0000 Subject: [PATCH 2/9] backport build system additions --- .../cuda/bindings/_internal/__init__.py | 0 cuda_bindings/setup.py | 40 ++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 cuda_bindings/cuda/bindings/_internal/__init__.py diff --git a/cuda_bindings/cuda/bindings/_internal/__init__.py b/cuda_bindings/cuda/bindings/_internal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 0d2b938d40..4968d54eb2 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -6,11 +6,15 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. +import atexit +import contextlib import glob import os import platform +import shutil import sys import sysconfig +import tempfile from Cython import Tempita from Cython.Build import cythonize @@ -145,7 +149,9 @@ def generate_output(infile, local): os.path.join('cuda', 'bindings'), os.path.join('cuda', 'bindings', '_bindings'), os.path.join('cuda', 'bindings', '_lib'), - os.path.join('cuda', 'bindings', '_lib', 'cyruntime')] + os.path.join('cuda', 'bindings', '_lib', 'cyruntime'), + os.path.join('cuda', 'bindings', '_internal'), + ] input_files = [] for path in path_list: input_files += fetch_input_files(path) @@ -206,6 +212,38 @@ def prep_extensions(sources): return exts +# new path for the bindings from cybind +def rename_architecture_specific_files(): + if sys.platform == "linux": + src_files = glob.glob(os.path.join(path, "*_linux.pyx")) + elif sys.platform == "win32": + src_files = glob.glob(os.path.join(path, "*_windows.pyx")) + else: + raise RuntimeError(f"platform is unrecognized: {sys.platform}") + dst_files = [] + for src in src_files: + # Set up a temporary file; it must be under the cache directory so + # that atomic moves within the same filesystem can be guaranteed + with tempfile.NamedTemporaryFile(delete=False, dir=".") as f: + shutil.copy2(src, f.name) + f_name = f.name + dst = src.replace("_linux", "").replace("_windows", "") + # atomic move with the destination guaranteed to be overwritten + os.replace(f_name, f"./{dst}") + dst_files.append(dst) + return dst_files + + +dst_files = rename_architecture_specific_files() + + +@atexit.register +def cleanup_dst_files(): + for dst in dst_files: + with contextlib.suppress(FileNotFoundError): + os.remove(dst) + + def do_cythonize(extensions): return cythonize( extensions, From 66aa0380f937c96a69eda567ea8f2a439e3c9f2d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 7 Feb 2025 22:19:03 -0800 Subject: [PATCH 3/9] CTK 11.8 IR version compatibility --- cuda_bindings/tests/test_nvvm.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index 4bf0a3cebb..86a74d4fdd 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -24,11 +24,11 @@ !0 = !{void ()* @kernel, !"kernel", i32 1} !nvvmir.version = !{!1} -!1 = !{i32 2, i32 0, i32 3, i32 1} +!1 = !{i32 %d, i32 %d, i32 %d, i32 %d} """ # noqa: E501 -# Equivalent to MINIMAL_NVVMIR_TXT -MINIMAL_NVVMIR_BITCODE = base64.b64decode(""" +# Equivalent to MINIMAL_NVVMIR_TXT % (2, 0, 3, 1) +MINIMAL_NVVMIR_BITCODE_2_0_3_1 = base64.b64decode(""" QkPA3jUUAAAFAAAAYgwwJElZvmbu034tRAEyBQAAAAAhDAAAJAEAAAsCIQACAAAAEwAAAAeBI5FB yARJBhAyOZIBhAwlBQgZHgSLYoAMRQJCkgtCZBAyFDgIGEsKMjKISJAUIENGiKUAGTJC5EgOkJEh xFBBUYGM4YPligQZRgaJIAAACwAAADIiyAggZIUEkyGkhASTIeOEoZAUEkyGjAuEZEwQFCMAJQBl @@ -73,7 +73,12 @@ @pytest.fixture(params=["txt", "bitcode"]) def minimal_nvvmir(request): - return MINIMAL_NVVMIR_TXT if request.param == "txt" else MINIMAL_NVVMIR_BITCODE + ir_vers = nvvm.ir_version() + if request.param == "txt": + return MINIMAL_NVVMIR_TXT % ir_vers + if ir_vers[:2] != (3, 0): + pytest.skip(f"MINIMAL_NVVMIR_BITCODE_2_0_3_1 vs {ir_vers} IR version incompatibility") + return MINIMAL_NVVMIR_BITCODE_2_0_3_1 @pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program]) @@ -103,13 +108,13 @@ def get_program_log(prog): def test_nvvm_version(): ver = nvvm.version() assert len(ver) == 2 - assert ver >= (2, 0) + assert ver >= (1, 0) def test_nvvm_ir_version(): ver = nvvm.ir_version() assert len(ver) == 4 - assert ver >= (2, 0, 3, 1) + assert ver >= (1, 0, 0, 0) def test_create_and_destroy(): From 4064fc3dcaf1c11bdc502e7ed9430ef4e85cc845 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 7 Feb 2025 22:56:33 -0800 Subject: [PATCH 4/9] Use llvmlite to convert IR txt to bitcode. --- cuda_bindings/requirements.txt | 1 + cuda_bindings/tests/test_nvvm.py | 64 +++++++------------------------- 2 files changed, 14 insertions(+), 51 deletions(-) diff --git a/cuda_bindings/requirements.txt b/cuda_bindings/requirements.txt index 2fdaa17e71..2d78753841 100644 --- a/cuda_bindings/requirements.txt +++ b/cuda_bindings/requirements.txt @@ -7,3 +7,4 @@ setuptools tomli; python_version < "3.11" wheel pywin32; sys_platform == 'win32' +llvmlite diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index 86a74d4fdd..8f0e8dfdfd 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import base64 import re from contextlib import contextmanager +import llvmlite.binding import pytest from cuda.bindings import nvvm @@ -27,58 +27,20 @@ !1 = !{i32 %d, i32 %d, i32 %d, i32 %d} """ # noqa: E501 -# Equivalent to MINIMAL_NVVMIR_TXT % (2, 0, 3, 1) -MINIMAL_NVVMIR_BITCODE_2_0_3_1 = base64.b64decode(""" -QkPA3jUUAAAFAAAAYgwwJElZvmbu034tRAEyBQAAAAAhDAAAJAEAAAsCIQACAAAAEwAAAAeBI5FB -yARJBhAyOZIBhAwlBQgZHgSLYoAMRQJCkgtCZBAyFDgIGEsKMjKISJAUIENGiKUAGTJC5EgOkJEh -xFBBUYGM4YPligQZRgaJIAAACwAAADIiyAggZIUEkyGkhASTIeOEoZAUEkyGjAuEZEwQFCMAJQBl -IGCOAAwAAAAAEyZ3sAd4oAd8sAM6aAN3sId0IId0CIc2GId6IIdw2OAS5dAG8KAHdkAHemAHdKAH -dkAHbZAOcaAHeKAHeNAG6YAHeoAHeoAHbZAOcWAHehAHdqAHcWAHbZAOcyAHejAHcqAHcyAHbZAO -dkAHemAHdKAHdkAHbZAOcSAHeKAHcSAHeKAHcSAHeNAG5jAHcqAHcyAHejAHctAG5mAHdKAHdkAH -emAHdNAG9hAHdqAHcWAHehAHdtAG9jAHcqAHcyAHejAHctAG9mAHdKAHdkAHemAHdNAG9hAHcoAH -ehAHcoAHehAHcoAHbeAOcWAHejAHcqAHdkAHGiEMGTFIgzDA8jdVxSCRvyxDIsAIAAAAAAAAAAAA -AEBig0BRlAAAgCwQBgAAADIemAwZEUyQjAkmR8YEQ2IJFMEIQBkAALEYAABtAAAAMwiAHMThHGYU -AT2IQziEw4xCgAd5eAdzmHEM5gAP7RAO9IAOMwxCHsLBHc6hHGYwBT2IQziEgxvMAz3IQz2MAz3M -eIx0cAd7CAd5SIdwcAd6cAN2eIdwIIcZzBEO7JAO4TAPbjAP4/AO8FAOMxDEHd4hHNghHcJhHmYw -iTu8gzvQQzm0Azy8gzyEAzvM8BR2YAd7aAc3aIdyaAc3gIdwkIdwYAd2KAd2+AV2eId3gIdfCIdx -GIdymId5mIEs7vAO7uAO9cAO7DADYsihHOShHMyhHOShHNxhHMohHMSBHcphBtaQQznIQzmYQznI -Qzm4wziUQziIAzuUwy+8gzz8gjvUAzuwwwzHaYdwWIdycIN0aAd4YId0GId0oIcZzlMP7gAP8lAO -5JAO40AP4SAO7FAOMyAoHdzBHsJBHtIhHNyBHtzgHOThHeoBHmYYUTiwQzqcgzvMUCR2YAd7aAc3 -YId3eAd4mFFM9JAP8FAOMx5qHsphHOghHd7BHX4BHuShHMwhHfBhBlSFgzjMwzuwQz3QQzn8wjzk -QzuIwzuww4zFCod5mId3GId0CAd6KAdyAAAAAHkgAAAeAAAAYh5IIEOIDBk5GSSQkUDGyMhoIlAI -FDKeGBkhR8iQUQwIBQAABgAAAGtlcm5lbAAAIwgCMIJABCMIhDCCQAwjCAQxwyAEwwwEURiDjAQm -KCE3O7s2lzA3tze6MLq0N7e5UQIjHTc7u7Y0ORe7Mrm5tDe3UYIDAAAAqRgAAAsAAAALCnIoh3eA -B3pYcJhDPbjDOLBDOdDDguYcxqEN6EEewsEd5iEd6CEd3sEdANEQAAAGAAAAB8w8pIM7nAM7lAM9 -oIM8lEM4kMMBAAAAYSAAAAYAAAATBAGGAwEAAAIAAAAHUBDNFGEAAAAAAABxIAAAAwAAADIOECKE -AKACAAAAAAAAAABlDAAAHQAAABIDlOgAAAAAAAAAAAYAAAAFAAAARAAAAAEAAABQAAAAAAAAAFAA -AAABAAAAaAAAAAAAAAALAAAAEwAAAB4AAAARAAAALwAAAAAAAAAAAAAAAQAAAAAAAAAAAAAABgAA -AAAAAAAGAAAA/////wAkAAAAAAAAXQwAAA8AAAASA5RvAAAAAGtlcm5lbDUuMC4xbnZwdHg2NC1u -dmlkaWEtY3VkYW1pbmltYWxfbnZ2bWlyLmxsAAAAAAA= -""") -# To regenerate, pull and start a docker container: -# docker pull centos/llvm-toolset-7-centos7 -# docker run -it centos/llvm-toolset-7-centos7 /bin/bash -# In the docker container, copy MINIMAL_NVVMIR_TXT to a file with name minimal_nvvmir.ll -# Then run: -# llvm-as minimal_nvvmir.ll -o minimal_nvvmir.bc -# Save this to encode.py: -# import base64, sys, textwrap -# bitcode = open(sys.argv[1], "rb").read() -# encoded_bitcode = base64.b64encode(bitcode).decode("ascii") -# wrapped_base64 = "\n".join(textwrap.wrap(encoded_bitcode, width=76)) -# print(wrapped_base64) -# Then run: -# python encode.py minimal_nvvmir.bc +MINIMAL_NVVMIR_CACHE = {} @pytest.fixture(params=["txt", "bitcode"]) def minimal_nvvmir(request): - ir_vers = nvvm.ir_version() - if request.param == "txt": - return MINIMAL_NVVMIR_TXT % ir_vers - if ir_vers[:2] != (3, 0): - pytest.skip(f"MINIMAL_NVVMIR_BITCODE_2_0_3_1 vs {ir_vers} IR version incompatibility") - return MINIMAL_NVVMIR_BITCODE_2_0_3_1 + for _ in range(2): + nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param) + if nvvmir is not None: + return nvvmir + txt = MINIMAL_NVVMIR_TXT % nvvm.ir_version() + bitcode = llvmlite.binding.parse_assembly(txt.decode()).as_bitcode() + MINIMAL_NVVMIR_CACHE["txt"] = txt + MINIMAL_NVVMIR_CACHE["bitcode"] = bitcode + raise AssertionError("This code path is meant to be unreachable.") @pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program]) @@ -172,7 +134,7 @@ def test_get_buffer_empty(get_size, get_buffer): @pytest.mark.parametrize("options", [[], ["-opt=0"], ["-opt=3", "-g"]]) -def test_compile_program_with_minimal_nnvm_ir(minimal_nvvmir, options): +def test_compile_program_with_minimal_nvvm_ir(minimal_nvvmir, options): with nvvm_program() as prog: nvvm.add_module_to_program(prog, minimal_nvvmir, len(minimal_nvvmir), "FileNameHere.ll") try: @@ -192,7 +154,7 @@ def test_compile_program_with_minimal_nnvm_ir(minimal_nvvmir, options): @pytest.mark.parametrize("options", [[], ["-opt=0"], ["-opt=3", "-g"]]) -def test_verify_program_with_minimal_nnvm_ir(minimal_nvvmir, options): +def test_verify_program_with_minimal_nvvm_ir(minimal_nvvmir, options): with nvvm_program() as prog: nvvm.add_module_to_program(prog, minimal_nvvmir, len(minimal_nvvmir), "FileNameHere.ll") nvvm.verify_program(prog, len(options), options) From 37a5e0fd3ab7d6562a277e02b6d50a7114e97177 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 10 Feb 2025 22:57:16 -0800 Subject: [PATCH 5/9] Make llvmlite an optional test dependency by introducing a MINIMAL_NVVMIR_BITCODE_STATIC dict. --- cuda_bindings/tests/test_nvvm.py | 95 +++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 8 deletions(-) diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index 8f0e8dfdfd..9fdfdaa231 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -2,14 +2,23 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import binascii import re +import textwrap from contextlib import contextmanager -import llvmlite.binding import pytest from cuda.bindings import nvvm +MINIMAL_NVVMIR_FIXTURE_PARAMS = ["txt", "bitcode_static"] +try: + import llvmlite.binding as llvmlite_binding # Optional test dependency. +except ImportError: + llvmlite_binding = None +else: + MINIMAL_NVVMIR_FIXTURE_PARAMS.append("bitcode_dynamic") + MINIMAL_NVVMIR_TXT = b"""\ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -24,22 +33,92 @@ !0 = !{void ()* @kernel, !"kernel", i32 1} !nvvmir.version = !{!1} -!1 = !{i32 %d, i32 %d, i32 %d, i32 %d} +!1 = !{i32 %d, i32 0, i32 %d, i32 0} """ # noqa: E501 +MINIMAL_NVVMIR_BITCODE_STATIC = { + (2, 3): # (major, debug_major) + "4243c0de3514000005000000620c30244a59be669dfbb4bf0b51804c01000000210c000080010000" + "0b02210002000000160000000781239141c80449061032399201840c250508191e048b62800c4502" + "42920b42641032143808184b0a3232884870c421234412878c1041920264c808b1142043468820c9" + "01323284182a282a90317cb05c9120c3c8000000892000000b0000003222c80820624600212b2498" + "0c212524980c19270c85a4906032645c20246382a01801300128030173046000132677b00778a007" + "7cb0033a680377b0877420877408873618877a208770d8e012e5d006f0a0077640077a600774a007" + "7640076d900e71a00778a00778d006e980077a80077a80076d900e7160077a100776a0077160076d" + "900e7320077a300772a0077320076d900e7640077a600774a0077640076d900e71200778a0077120" + "0778a00771200778d006e6300772a0077320077a300772d006e6600774a0077640077a600774d006" + "f6100776a0077160077a100776d006f6300772a0077320077a300772d006f6600774a0077640077a" + "600774d006f610077280077a10077280077a10077280076de00e7160077a300772a0077640071a21" + "4c0e11de9c2e4fbbcfbe211560040000000000000000000000000620b141a0286100004016080000" + "06000000321e980c19114c908c092647c60443620914c10840190000b1180000ac0000003308801c" + "c4e11c6614013d88433884c38c4280077978077398710ce6000fed100ef4800e330c421ec2c11dce" + "a11c6630053d88433884831bcc033dc8433d8c033dcc788c7470077b08077948877070077a700376" + "788770208719cc110eec900ee1300f6e300fe3f00ef0500e3310c41dde211cd8211dc2611e663089" + "3bbc833bd04339b4033cbc833c84033bccf0147660077b6807376887726807378087709087706007" + "76280776f8057678877780875f08877118877298877998812ceef00eeee00ef5c00eec300362c8a1" + "1ce4a11ccca11ce4a11cdc611cca211cc4811dca6106d6904339c84339984339c84339b8c3389443" + "3888033b94c32fbc833cfc823bd4033bb0c30cc7698770588772708374680778608774188774a087" + "19ce530fee000ff2500ee4900ee3400fe1200eec500e3320281ddcc11ec2411ed2211cdc811edce0" + "1ce4e11dea011e66185138b0433a9c833bcc50247660077b68073760877778077898514cf4900ff0" + "500e331e6a1eca611ce8211ddec11d7e011ee4a11ccc211df0610654858338ccc33bb0433dd04339" + "fcc23ce4433b88c33bb0c38cc50a877998877718877408077a28077298815ce3100eecc00ee5500e" + "f33023c1d2411ee4e117d8e11dde011e6648193bb0833db4831b84c3388c4339ccc33cb8c139c8c3" + "3bd4033ccc48b471080776600771088771588719dbc60eec600fede006f0200fe5300fe5200ff650" + "0e6e100ee3300ee5300ff3e006e9e00ee4500ef83023e2ec611cc2811dd8e117ec211de6211dc421" + "1dd8211de8211f66209d3bbc433db80339948339cc58bc7070077778077a08077a488777708719cb" + "e70eef300fe1e00ee9400fe9a00fe530c3010373a8077718875f988770708774a08774d087729881" + "844139e0c338b0433d904339cc40c4a01dcaa11de0411edec11c662463300ee1c00eec300fe9400f" + "e5000000792000001e000000721e482043880c19097232482023818c9191d144a01028643c313242" + "8e9021a318100a00060000006b65726e656c0000230802308240042308843082400c23080431c320" + "04c30c045118858c04262821373bbb36973037b737ba30bab437b7b95102231d373bbbb6343917bb" + "32b9b9b437b7518203000000a9180000250000000b0a7228877780077a587098433db8c338b04339" + "d0c382e61cc6a10de8411ec2c11de6211de8211ddec11d1634e3600ee7500fe1200fe4400fe1200f" + "e7500ef4b08081077928877060077678877108077a28077258709cc338b4013ba4833d94c3026b1c" + "d8211cdce11cdc201ce4611cdc201ce8811ec2611cd0a11cc8611cc2811dd861c1010ff4200fe150" + "0ff4800e00000000d11000000600000007cc3ca4833b9c033b94033da0833c94433890c301000000" + "6120000006000000130481860301000002000000075010cd14610000000000007120000003000000" + "320e10228400fc020000000000000000650c00001f000000120394f0000000000300000006000000" + "060000004c000000010000005800000000000000580000000100000070000000000000000c000000" + "130000001f0000000800000006000000000000007000000000000000000000000100000000000000" + "00000000060000000000000006000000ffffffff00240000000000005d0c00000d00000012039467" + "000000006b65726e656c31352e302e376e7670747836342d6e76696469612d637564613c73747269" + "6e673e0000000000", +} + MINIMAL_NVVMIR_CACHE = {} -@pytest.fixture(params=["txt", "bitcode"]) +@pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS) def minimal_nvvmir(request): for _ in range(2): - nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param) - if nvvmir is not None: + nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1) + if nvvmir != -1: + if nvvmir is None: + pytest.skip(f"UNAVAILABLE: {request.param}") return nvvmir - txt = MINIMAL_NVVMIR_TXT % nvvm.ir_version() - bitcode = llvmlite.binding.parse_assembly(txt.decode()).as_bitcode() + major, minor, debug_major, debug_minor = nvvm.ir_version() + txt = MINIMAL_NVVMIR_TXT % (major, debug_major) + if llvmlite_binding is None: + bitcode_dynamic = None + else: + bitcode_dynamic = llvmlite_binding.parse_assembly(txt.decode()).as_bitcode() + bitcode_static = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major)) + if bitcode_static is not None: + bitcode_static = binascii.unhexlify(bitcode_static) MINIMAL_NVVMIR_CACHE["txt"] = txt - MINIMAL_NVVMIR_CACHE["bitcode"] = bitcode + MINIMAL_NVVMIR_CACHE["bitcode_dynamic"] = bitcode_dynamic + MINIMAL_NVVMIR_CACHE["bitcode_static"] = bitcode_static + if bitcode_static is None: + if bitcode_dynamic is None: + raise RuntimeError("Please `pip install llvmlite` to generate `bitcode_static`") + bitcode_hex = binascii.hexlify(bitcode_dynamic).decode("ascii") + print("\n\nMINIMAL_NVVMIR_BITCODE_STATIC = { # PLEASE ADD TO test_nvvm.py") + print(f" ({major}, {debug_major}): # (major, debug_major)") + lines = textwrap.wrap(bitcode_hex, width=80) + for line in lines[:-1]: + print(f' "{line}"') + print(f' "{lines[-1]}",') + print("}\n", flush=True) raise AssertionError("This code path is meant to be unreachable.") From 03cb2ee2a591f06a6e79344c938d885a22099d1a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 10 Feb 2025 23:26:36 -0800 Subject: [PATCH 6/9] Add MINIMAL_NVVMIR_BITCODE_STATIC entry for CTK 11.8 --- cuda_bindings/tests/test_nvvm.py | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index 9fdfdaa231..e356a077d1 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -37,6 +37,52 @@ """ # noqa: E501 MINIMAL_NVVMIR_BITCODE_STATIC = { + (1, 3): # (major, debug_major) + "4243c0de3514000005000000620c30244a59be669dfbb4bf0b51804c01000000210c00007f010000" + "0b02210002000000160000000781239141c80449061032399201840c250508191e048b62800c4502" + "42920b42641032143808184b0a3232884870c421234412878c1041920264c808b1142043468820c9" + "01323284182a282a90317cb05c9120c3c8000000892000000b0000003222c80820624600212b2498" + "0c212524980c19270c85a4906032645c20246382a01801300128030173046000132677b00778a007" + "7cb0033a680377b0877420877408873618877a208770d8e012e5d006f0a0077640077a600774a007" + "7640076d900e71a00778a00778d006e980077a80077a80076d900e7160077a100776a0077160076d" + "900e7320077a300772a0077320076d900e7640077a600774a0077640076d900e71200778a0077120" + "0778a00771200778d006e6300772a0077320077a300772d006e6600774a0077640077a600774d006" + "f6100776a0077160077a100776d006f6300772a0077320077a300772d006f6600774a0077640077a" + "600774d006f610077280077a10077280077a10077280076de00e7160077a300772a0077640071a21" + "4c0e11de9c2e4fbbcfbe211560040000000000000000000000000620b141a0e86000004016080000" + "06000000321e980c19114c908c092647c6044362098c009401000000b1180000ac0000003308801c" + "c4e11c6614013d88433884c38c4280077978077398710ce6000fed100ef4800e330c421ec2c11dce" + "a11c6630053d88433884831bcc033dc8433d8c033dcc788c7470077b08077948877070077a700376" + "788770208719cc110eec900ee1300f6e300fe3f00ef0500e3310c41dde211cd8211dc2611e663089" + "3bbc833bd04339b4033cbc833c84033bccf0147660077b6807376887726807378087709087706007" + "76280776f8057678877780875f08877118877298877998812ceef00eeee00ef5c00eec300362c8a1" + "1ce4a11ccca11ce4a11cdc611cca211cc4811dca6106d6904339c84339984339c84339b8c3389443" + "3888033b94c32fbc833cfc823bd4033bb0c30cc7698770588772708374680778608774188774a087" + "19ce530fee000ff2500ee4900ee3400fe1200eec500e3320281ddcc11ec2411ed2211cdc811edce0" + "1ce4e11dea011e66185138b0433a9c833bcc50247660077b68073760877778077898514cf4900ff0" + "500e331e6a1eca611ce8211ddec11d7e011ee4a11ccc211df0610654858338ccc33bb0433dd04339" + "fcc23ce4433b88c33bb0c38cc50a877998877718877408077a28077298815ce3100eecc00ee5500e" + "f33023c1d2411ee4e117d8e11dde011e6648193bb0833db4831b84c3388c4339ccc33cb8c139c8c3" + "3bd4033ccc48b471080776600771088771588719dbc60eec600fede006f0200fe5300fe5200ff650" + "0e6e100ee3300ee5300ff3e006e9e00ee4500ef83023e2ec611cc2811dd8e117ec211de6211dc421" + "1dd8211de8211f66209d3bbc433db80339948339cc58bc7070077778077a08077a488777708719cb" + "e70eef300fe1e00ee9400fe9a00fe530c3010373a8077718875f988770708774a08774d087729881" + "844139e0c338b0433d904339cc40c4a01dcaa11de0411edec11c662463300ee1c00eec300fe9400f" + "e5000000792000001d000000721e482043880c19097232482023818c9191d144a01028643c313242" + "8e9021a318100a00060000006b65726e656c0000230802308240042308843082400c330c4230cc40" + "0c4441c84860821272b3b36b730973737ba30ba34b7b739b1b2528d271b3b36b4b9373b12b939b4b" + "7b731b2530000000a9180000250000000b0a7228877780077a587098433db8c338b04339d0c382e6" + "1cc6a10de8411ec2c11de6211de8211ddec11d1634e3600ee7500fe1200fe4400fe1200fe7500ef4" + "b08081077928877060077678877108077a28077258709cc338b4013ba4833d94c3026b1cd8211cdc" + "e11cdc201ce4611cdc201ce8811ec2611cd0a11cc8611cc2811dd861c1010ff4200fe1500ff4800e" + "00000000d11000000600000007cc3ca4833b9c033b94033da0833c94433890c30100000061200000" + "06000000130481860301000002000000075010cd14610000000000007120000003000000320e1022" + "8400fb020000000000000000650c00001f000000120394f000000000030000000600000006000000" + "4c000000010000005800000000000000580000000100000070000000000000000c00000013000000" + "1f000000080000000600000000000000700000000000000000000000010000000000000000000000" + "060000000000000006000000ffffffff00240000000000005d0c00000d0000001203946700000000" + "6b65726e656c31352e302e376e7670747836342d6e76696469612d637564613c737472696e673e00" + "00000000", (2, 3): # (major, debug_major) "4243c0de3514000005000000620c30244a59be669dfbb4bf0b51804c01000000210c000080010000" "0b02210002000000160000000781239141c80449061032399201840c250508191e048b62800c4502" From d388b624ff243941b1dc1f6f6b8650d3c30f1f98 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 10 Feb 2025 23:31:52 -0800 Subject: [PATCH 7/9] Comment out llvmlite in requirements.txt --- cuda_bindings/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/requirements.txt b/cuda_bindings/requirements.txt index 2d78753841..eeffb93cb9 100644 --- a/cuda_bindings/requirements.txt +++ b/cuda_bindings/requirements.txt @@ -7,4 +7,4 @@ setuptools tomli; python_version < "3.11" wheel pywin32; sys_platform == 'win32' -llvmlite +# llvmlite # Uncomment to generate MINIMAL_NVVMIR_BITCODE_STATIC for test_nvvm.py From e1950d5efeaa4c0bb3f739ae566cc26c10f50696 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 11 Feb 2025 10:19:47 -0800 Subject: [PATCH 8/9] Add "(see PR #443)" --- cuda_bindings/requirements.txt | 2 +- cuda_bindings/tests/test_nvvm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/requirements.txt b/cuda_bindings/requirements.txt index eeffb93cb9..1ad127ac28 100644 --- a/cuda_bindings/requirements.txt +++ b/cuda_bindings/requirements.txt @@ -7,4 +7,4 @@ setuptools tomli; python_version < "3.11" wheel pywin32; sys_platform == 'win32' -# llvmlite # Uncomment to generate MINIMAL_NVVMIR_BITCODE_STATIC for test_nvvm.py +# llvmlite # Uncomment to generate MINIMAL_NVVMIR_BITCODE_STATIC for test_nvvm.py (see PR #443). diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index e356a077d1..753fdd9702 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -156,7 +156,7 @@ def minimal_nvvmir(request): MINIMAL_NVVMIR_CACHE["bitcode_static"] = bitcode_static if bitcode_static is None: if bitcode_dynamic is None: - raise RuntimeError("Please `pip install llvmlite` to generate `bitcode_static`") + raise RuntimeError("Please `pip install llvmlite` to generate `bitcode_static` (see PR #443)") bitcode_hex = binascii.hexlify(bitcode_dynamic).decode("ascii") print("\n\nMINIMAL_NVVMIR_BITCODE_STATIC = { # PLEASE ADD TO test_nvvm.py") print(f" ({major}, {debug_major}): # (major, debug_major)") From 236fe897440a80933739e1c11d6329128f5bb493 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 11 Feb 2025 12:21:40 -0800 Subject: [PATCH 9/9] Make minimal_nvvmir fixture implementation slightly more readable. --- cuda_bindings/tests/test_nvvm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/tests/test_nvvm.py b/cuda_bindings/tests/test_nvvm.py index 753fdd9702..db8481a925 100644 --- a/cuda_bindings/tests/test_nvvm.py +++ b/cuda_bindings/tests/test_nvvm.py @@ -136,12 +136,15 @@ @pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS) def minimal_nvvmir(request): - for _ in range(2): + for pass_counter in range(2): nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1) if nvvmir != -1: if nvvmir is None: pytest.skip(f"UNAVAILABLE: {request.param}") return nvvmir + if pass_counter: + raise AssertionError("This code path is meant to be unreachable.") + # Build cache entries, then try again (above). major, minor, debug_major, debug_minor = nvvm.ir_version() txt = MINIMAL_NVVMIR_TXT % (major, debug_major) if llvmlite_binding is None: @@ -165,7 +168,6 @@ def minimal_nvvmir(request): print(f' "{line}"') print(f' "{lines[-1]}",') print("}\n", flush=True) - raise AssertionError("This code path is meant to be unreachable.") @pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program])