From dc3222ddb41f442d599eafdab53d923987f176d7 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 26 Aug 2025 19:09:04 +0530 Subject: [PATCH 01/42] nvvm ir integration --- cuda_core/cuda/core/experimental/_module.py | 18 +++++ cuda_core/cuda/core/experimental/_program.py | 65 +++++++++++++++++-- .../core/experimental/_utils/cuda_utils.pyx | 27 +++++++- 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 63bb6ff260..8b3146fee3 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -544,6 +544,24 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt them (default to no mappings). """ return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) + + @staticmethod + def from_nvvm(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": + """Create an :class:`ObjectCode` instance from an existing NVVM IR. + + Parameters + ---------- + module : Union[bytes, str] + Either a bytes object containing the in-memory NVVM IR code to load, or + a file path string pointing to the on-disk NVVM IR file to load. + name : Optional[str] + A human-readable identifier representing this code object. + symbol_mapping : Optional[dict] + A dictionary specifying how the unmangled symbol names (as keys) + should be mapped to the mangled names before trying to retrieve + them (default to no mappings). + """ + return ObjectCode._init(module, "nvvm", name=name, symbol_mapping=symbol_mapping) @staticmethod def from_fatbin( diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 3df8894d50..7782cfe5ed 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -24,6 +24,7 @@ is_nested_sequence, is_sequence, nvrtc, + nvvm, ) @@ -370,22 +371,26 @@ class Program: code : Any String of the CUDA Runtime Compilation program. code_type : Any - String of the code type. Currently ``"ptx"`` and ``"c++"`` are supported. + String of the code type. Currently ``"ptx"``, ``"c++"``, and ``"nvvm"`` are supported. options : ProgramOptions, optional A ProgramOptions object to customize the compilation process. See :obj:`ProgramOptions` for more information. """ class _MembersNeededForFinalize: - __slots__ = "handle" + __slots__ = "handle", "backend" - def __init__(self, program_obj, handle): + def __init__(self, program_obj, handle, backend="NVRTC"): self.handle = handle + self.backend = backend weakref.finalize(program_obj, self.close) def close(self): if self.handle is not None: - handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) + if self.backend == "NVRTC": + handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) + elif self.backend == "NVVM": + handle_return(nvvm.destroy_program(self.handle)) self.handle = None __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options") @@ -402,6 +407,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) + self._mnff.backend = "NVRTC" self._backend = "NVRTC" self._linker = None @@ -411,8 +417,21 @@ def __init__(self, code, code_type, options: ProgramOptions = None): ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) self._backend = self._linker.backend + + elif code_type == "nvvm": + if isinstance(code, str): + code = code.encode('utf-8') + elif not isinstance(code, (bytes, bytearray)): + raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") + + self._mnff.handle = nvvm.create_program() + self._mnff.backend = "NVVM" + nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) + self._backend = "NVVM" + self._linker = None + else: - supported_code_types = ("c++", "ptx") + supported_code_types = ("c++", "ptx", "nvvm") assert code_type not in supported_code_types, f"{code_type=}" raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") @@ -513,6 +532,42 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) + elif self._backend == "NVVM": + if target_type != "ptx": + raise ValueError(f'NVVM backend only supports target_type="ptx", got "{target_type}"') + + nvvm_options = [] + if self._options.arch is not None: + arch = self._options.arch + if arch.startswith("sm_"): + arch = f"compute_{arch[3:]}" + nvvm_options.append(f"-arch={arch}") + else: + major, minor = Device().compute_capability + nvvm_options.append(f"-arch=compute_{major}{minor}") + + if self._options.debug: + nvvm_options.append("-g") + if self._options.device_code_optimize is False: + nvvm_options.append("-opt=0") + elif self._options.device_code_optimize is True: + nvvm_options.append("-opt=3") + + nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) + + size = nvvm.get_compiled_result_size(self._mnff.handle) + data = bytearray(size) + nvvm.get_compiled_result(self._mnff.handle, data) + + if logs is not None: + logsize = nvvm.get_program_log_size(self._mnff.handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(self._mnff.handle, log) + logs.write(log.decode("utf-8", errors="backslashreplace")) + + return ObjectCode._init(data, target_type, name=self._options.name) + supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})') diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index 86588f733c..a51b0c766c 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -9,11 +9,11 @@ from collections.abc import Sequence from typing import Callable try: - from cuda.bindings import driver, nvrtc, runtime + from cuda.bindings import driver, nvrtc, nvvm, runtime except ImportError: from cuda import cuda as driver from cuda import cudart as runtime - from cuda import nvrtc + from cuda import nvrtc, nvvm from cuda.core.experimental._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS from cuda.core.experimental._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS @@ -27,6 +27,10 @@ class NVRTCError(CUDAError): pass +class NVVMError(CUDAError): + pass + + ComputeCapability = namedtuple("ComputeCapability", ("major", "minor")) @@ -55,6 +59,7 @@ def _reduce_3_tuple(t: tuple): cdef object _DRIVER_SUCCESS = driver.CUresult.CUDA_SUCCESS cdef object _RUNTIME_SUCCESS = runtime.cudaError_t.cudaSuccess cdef object _NVRTC_SUCCESS = nvrtc.nvrtcResult.NVRTC_SUCCESS +cdef object _NVVM_SUCCESS = nvvm.Result.SUCCESS cpdef inline int _check_driver_error(error) except?-1: @@ -103,6 +108,22 @@ cpdef inline int _check_nvrtc_error(error, handle=None) except?-1: raise NVRTCError(err) +cpdef inline int _check_nvvm_error(error, handle=None) except?-1: + if error == _NVVM_SUCCESS: + return 0 + err = f"{error}: {nvvm.get_error_string(error)}" + if handle is not None: + try: + logsize = nvvm.get_program_log_size(handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(handle, log) + err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}" + except: + pass # Log extraction failed, but we still have the error + raise NVVMError(err) + + cdef inline int _check_error(error, handle=None) except?-1: if isinstance(error, driver.CUresult): return _check_driver_error(error) @@ -110,6 +131,8 @@ cdef inline int _check_error(error, handle=None) except?-1: return _check_runtime_error(error) elif isinstance(error, nvrtc.nvrtcResult): return _check_nvrtc_error(error, handle=handle) + elif isinstance(error, nvvm.Result): + return _check_nvvm_error(error, handle=handle) else: raise RuntimeError(f"Unknown error type: {error}") From 028a2948e96d5b5b2c88066a1e4f0bfd8ef3c328 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 27 Aug 2025 20:10:18 +0530 Subject: [PATCH 02/42] add test --- .../core/experimental/_utils/cuda_utils.pyx | 4 +- cuda_core/tests/test_program.py | 72 ++++++++++++++++++- 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index a51b0c766c..01d39e4f1f 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -119,8 +119,8 @@ cpdef inline int _check_nvvm_error(error, handle=None) except?-1: log = bytearray(logsize) nvvm.get_program_log(handle, log) err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}" - except: - pass # Log extraction failed, but we still have the error + except Exception as e: + raise NVVMError(err) from e raise NVVMError(err) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index e5c873f1fb..8c8b29ecfe 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -12,6 +12,37 @@ is_culink_backend = _linker._decide_nvjitlink_or_driver() +nvvm_ir = """ +target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define i32 @ave(i32 %a, i32 %b) { +entry: + %add = add nsw i32 %a, %b + %div = sdiv i32 %add, 2 + ret i32 %div +} + +define void @simple(i32* %data) { +entry: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + %mul = mul i32 %0, %1 + %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %add = add i32 %mul, %2 + %call = call i32 @ave(i32 %add, i32 %add) + %idxprom = sext i32 %add to i64 + store i32 %call, i32* %data, align 4 + ret void +} + +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone + +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone + +""" @pytest.fixture(scope="module") def ptx_code_object(): @@ -92,7 +123,7 @@ def test_program_init_valid_code_type(): def test_program_init_invalid_code_type(): code = "goto 100" with pytest.raises( - RuntimeError, match=r"^Unsupported code_type='fortran' \(supported_code_types=\('c\+\+', 'ptx'\)\)$" + RuntimeError, match=r"^Unsupported code_type='fortran' \(supported_code_types=\('c\+\+', 'ptx', 'nvvm'\)\)$" ): Program(code, "FORTRAN") @@ -150,3 +181,42 @@ def test_program_close(): program = Program(code, "c++") program.close() assert program.handle is None + +nvvm_options = [ + ProgramOptions(name="nvvm_test"), + ProgramOptions(device_code_optimize=True), + ProgramOptions(arch="sm_90"), + ProgramOptions(debug=True), +] + +@pytest.mark.parametrize("options", nvvm_options) +def test_nvvm_program_with_various_options(init_cuda, options): + program = Program(nvvm_ir, "nvvm", options) + assert program.backend == "NVVM" + program.compile("ptx") + program.close() + assert program.handle is None + + +def test_nvvm_program_creation(): + program = Program(nvvm_ir, "nvvm") + assert program.backend == "NVVM" + assert program.handle is not None + + +def test_nvvm_compile_invalid_target(): + program = Program(nvvm_ir, "nvvm") + with pytest.raises(ValueError): + program.compile("cubin") + + +def test_nvvm_compile_valid_target_type(init_cuda): + program = Program(nvvm_ir, "nvvm", options={"name": "nvvm_test"}) + ptx_object_code = program.compile("ptx") + assert isinstance(ptx_object_code, ObjectCode) + assert ptx_object_code.name == "nvvm_test" + + ptx_kernel = ptx_object_code.get_kernel("nvvm_kernel") + assert isinstance(ptx_kernel, Kernel) + + program.close() From a418f4bab98b4278b354c18841afabaca6674abc Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 1 Sep 2025 11:49:34 +0530 Subject: [PATCH 03/42] remove nvvm error handling from utils --- .../core/experimental/_utils/cuda_utils.pyx | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index 01d39e4f1f..af7d212642 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -26,11 +26,6 @@ class CUDAError(Exception): class NVRTCError(CUDAError): pass - -class NVVMError(CUDAError): - pass - - ComputeCapability = namedtuple("ComputeCapability", ("major", "minor")) @@ -59,8 +54,6 @@ def _reduce_3_tuple(t: tuple): cdef object _DRIVER_SUCCESS = driver.CUresult.CUDA_SUCCESS cdef object _RUNTIME_SUCCESS = runtime.cudaError_t.cudaSuccess cdef object _NVRTC_SUCCESS = nvrtc.nvrtcResult.NVRTC_SUCCESS -cdef object _NVVM_SUCCESS = nvvm.Result.SUCCESS - cpdef inline int _check_driver_error(error) except?-1: if error == _DRIVER_SUCCESS: @@ -107,23 +100,6 @@ cpdef inline int _check_nvrtc_error(error, handle=None) except?-1: err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}" raise NVRTCError(err) - -cpdef inline int _check_nvvm_error(error, handle=None) except?-1: - if error == _NVVM_SUCCESS: - return 0 - err = f"{error}: {nvvm.get_error_string(error)}" - if handle is not None: - try: - logsize = nvvm.get_program_log_size(handle) - if logsize > 1: - log = bytearray(logsize) - nvvm.get_program_log(handle, log) - err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}" - except Exception as e: - raise NVVMError(err) from e - raise NVVMError(err) - - cdef inline int _check_error(error, handle=None) except?-1: if isinstance(error, driver.CUresult): return _check_driver_error(error) @@ -131,8 +107,6 @@ cdef inline int _check_error(error, handle=None) except?-1: return _check_runtime_error(error) elif isinstance(error, nvrtc.nvrtcResult): return _check_nvrtc_error(error, handle=handle) - elif isinstance(error, nvvm.Result): - return _check_nvvm_error(error, handle=handle) else: raise RuntimeError(f"Unknown error type: {error}") From 92af3dd5c53146e2734262528ae9ecde95c06322 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 1 Sep 2025 15:20:41 +0530 Subject: [PATCH 04/42] use version dependent nvvm inclusion --- cuda_core/cuda/core/experimental/_program.py | 65 +++++++++++++++++-- .../core/experimental/_utils/cuda_utils.pyx | 4 +- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 7782cfe5ed..63c903736d 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -21,12 +21,63 @@ check_or_create_options, driver, handle_return, + get_binding_version, is_nested_sequence, is_sequence, - nvrtc, - nvvm, + nvrtc ) +_nvvm_module = None +_nvvm_import_attempted = False + + +def _get_nvvm_module(): + """ + Handles the import of NVVM module with version and availability checks. + NVVM bindings were added in CUDA 12.9.0, so we need to handle cases where: + 1. cuda.bindings is not new enough (< 12.9.0) + 2. libnvvm is not found in the Python environment + + Returns: + The nvvm module if available and working + + Raises: + ImportError: If NVVM is not available due to version or library issues + """ + global _nvvm_module, _nvvm_import_attempted + + if _nvvm_import_attempted: + if _nvvm_module is None: + raise ImportError("NVVM module is not available (previous import attempt failed)") + return _nvvm_module + + _nvvm_import_attempted = True + + try: + version = get_binding_version() + if version < (12, 9): + raise ImportError( + f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " + "Please update cuda-bindings to use NVVM features." + ) + + from cuda.bindings import nvvm + try: + from cuda.bindings._internal.nvvm import _inspect_function_pointers + _inspect_function_pointers() + except Exception as e: + raise ImportError( + "NVVM library (libnvvm) is not available in this Python environment. " + f"Original error: {e}" + ) + + _nvvm_module = nvvm + return _nvvm_module + + except ImportError as e: + _nvvm_module = None + raise e + def _process_define_macro_inner(formatted_options, macro): if isinstance(macro, str): @@ -390,7 +441,11 @@ def close(self): if self.backend == "NVRTC": handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) elif self.backend == "NVVM": - handle_return(nvvm.destroy_program(self.handle)) + try: + nvvm = _get_nvvm_module() + handle_return(nvvm.destroy_program(self.handle)) + except ImportError as e: + pass self.handle = None __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options") @@ -424,6 +479,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): elif not isinstance(code, (bytes, bytearray)): raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") + nvvm = _get_nvvm_module() self._mnff.handle = nvvm.create_program() self._mnff.backend = "NVVM" nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) @@ -552,7 +608,8 @@ def compile(self, target_type, name_expressions=(), logs=None): nvvm_options.append("-opt=0") elif self._options.device_code_optimize is True: nvvm_options.append("-opt=3") - + + nvvm = _get_nvvm_module() nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) size = nvvm.get_compiled_result_size(self._mnff.handle) diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index af7d212642..4a9d61aee9 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -9,11 +9,11 @@ from collections.abc import Sequence from typing import Callable try: - from cuda.bindings import driver, nvrtc, nvvm, runtime + from cuda.bindings import driver, nvrtc, runtime except ImportError: from cuda import cuda as driver from cuda import cudart as runtime - from cuda import nvrtc, nvvm + from cuda import nvrtc from cuda.core.experimental._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS from cuda.core.experimental._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS From bdd167111e88b6a45ab4d36df66713a2724094e9 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 1 Sep 2025 22:54:58 +0530 Subject: [PATCH 05/42] fix nvvm compilation flow and test --- cuda_core/cuda/core/experimental/_program.py | 5 +- cuda_core/tests/test_program.py | 179 +++++++++++++++---- 2 files changed, 147 insertions(+), 37 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 63c903736d..7034e68a1c 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -443,7 +443,7 @@ def close(self): elif self.backend == "NVVM": try: nvvm = _get_nvvm_module() - handle_return(nvvm.destroy_program(self.handle)) + nvvm.destroy_program(self.handle) except ImportError as e: pass self.handle = None @@ -623,7 +623,8 @@ def compile(self, target_type, name_expressions=(), logs=None): nvvm.get_program_log(self._mnff.handle, log) logs.write(log.decode("utf-8", errors="backslashreplace")) - return ObjectCode._init(data, target_type, name=self._options.name) + data_bytes = bytes(data) + return ObjectCode._init(data_bytes, target_type, name=self._options.name) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8c8b29ecfe..bb1ba02d9b 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -12,8 +12,52 @@ is_culink_backend = _linker._decide_nvjitlink_or_driver() -nvvm_ir = """ -target triple = "nvptx64-unknown-cuda" +def get_nvvm_ir(): + """Generate working NVVM IR with proper version metadata""" + try: + from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() + major, minor, debug_major, debug_minor = nvvm.ir_version() + + + nvvm_ir_template = '''target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define i32 @ave(i32 %a, i32 %b) {{ +entry: + %add = add nsw i32 %a, %b + %div = sdiv i32 %add, 2 + ret i32 %div +}} + +define void @simple(i32* %data) {{ +entry: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + %mul = mul i32 %0, %1 + %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %add = add i32 %mul, %2 + %call = call i32 @ave(i32 %add, i32 %add) + %idxprom = sext i32 %add to i64 + store i32 %call, i32* %data, align 4 + ret void +}} + +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone + +!nvvm.annotations = !{{!0}} +!0 = !{{void (i32*)* @simple, !"kernel", i32 1}} + +!nvvmir.version = !{{!1}} +!1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} +''' + + return nvvm_ir_template.format(major=major, debug_major=debug_major) + except Exception: + + return """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) { @@ -37,13 +81,15 @@ } declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone - declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone - declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone +!nvvm.annotations = !{!0} +!0 = !{void (i32*)* @simple, !"kernel", i32 1} """ +nvvm_ir = get_nvvm_ir() + @pytest.fixture(scope="module") def ptx_code_object(): code = 'extern "C" __global__ void my_kernel() {}' @@ -182,41 +228,104 @@ def test_program_close(): program.close() assert program.handle is None -nvvm_options = [ - ProgramOptions(name="nvvm_test"), - ProgramOptions(device_code_optimize=True), - ProgramOptions(arch="sm_90"), - ProgramOptions(debug=True), -] -@pytest.mark.parametrize("options", nvvm_options) -def test_nvvm_program_with_various_options(init_cuda, options): - program = Program(nvvm_ir, "nvvm", options) - assert program.backend == "NVVM" - program.compile("ptx") - program.close() - assert program.handle is None -def test_nvvm_program_creation(): - program = Program(nvvm_ir, "nvvm") - assert program.backend == "NVVM" - assert program.handle is not None +def test_nvvm_deferred_import(): + """Test that our deferred NVVM import works correctly""" + try: + from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() + assert nvvm is not None + except ImportError as e: + pytest.skip(f"NVVM not available: {e}") -def test_nvvm_compile_invalid_target(): - program = Program(nvvm_ir, "nvvm") - with pytest.raises(ValueError): - program.compile("cubin") +def test_nvvm_program_creation(): + """Test basic NVVM program creation""" + try: + program = Program(nvvm_ir, "nvvm") + assert program.backend == "NVVM" + assert program.handle is not None + program.close() + except ImportError as e: + pytest.skip(f"NVVM not available: {e}") -def test_nvvm_compile_valid_target_type(init_cuda): - program = Program(nvvm_ir, "nvvm", options={"name": "nvvm_test"}) - ptx_object_code = program.compile("ptx") - assert isinstance(ptx_object_code, ObjectCode) - assert ptx_object_code.name == "nvvm_test" - - ptx_kernel = ptx_object_code.get_kernel("nvvm_kernel") - assert isinstance(ptx_kernel, Kernel) - - program.close() +def test_nvvm_compile_invalid_target(): + """Test that NVVM programs reject invalid compilation targets""" + try: + program = Program(nvvm_ir, "nvvm") + with pytest.raises(ValueError, match="NVVM backend only supports target_type=\"ptx\""): + program.compile("cubin") + program.close() + except ImportError as e: + pytest.skip(f"NVVM not available: {e}") + + +def test_nvvm_compile_to_ptx(init_cuda): + """Test NVVM IR compilation to PTX""" + try: + options = ProgramOptions(name="nvvm_test", arch="sm_90", device_code_optimize=False) + program = Program(nvvm_ir, "nvvm", options=options) + try: + ptx_object_code = program.compile("ptx") + assert isinstance(ptx_object_code, ObjectCode) + assert ptx_object_code.name == "nvvm_test" + assert ptx_object_code._code_type == "ptx" + + ptx_code = ptx_object_code.code + if isinstance(ptx_code, bytes): + ptx_text = ptx_code.decode() + else: + ptx_text = str(ptx_code) + assert ".visible .entry simple(" in ptx_text + + ptx_kernel = ptx_object_code.get_kernel("simple") + assert isinstance(ptx_kernel, Kernel) + + except Exception as e: + if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): + pytest.skip(f"NVVM IR not compatible with this CUDA version: {e}") + else: + raise + finally: + program.close() + + except ImportError as e: + pytest.skip(f"NVVM not available: {e}") + + +@pytest.mark.parametrize("options", [ + ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), + ProgramOptions(name="test2", arch="sm_90", device_code_optimize=False), + ProgramOptions(name="test3", arch="sm_90", device_code_optimize=True), +]) +def test_nvvm_program_options(init_cuda, options): + """Test NVVM programs with different options""" + try: + program = Program(nvvm_ir, "nvvm", options) + assert program.backend == "NVVM" + + try: + ptx_code = program.compile("ptx") + assert isinstance(ptx_code, ObjectCode) + assert ptx_code.name == options.name + + code_content = ptx_code.code + if isinstance(code_content, bytes): + ptx_text = code_content.decode() + else: + ptx_text = str(code_content) + assert ".visible .entry simple(" in ptx_text + + except Exception as e: + if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): + pytest.skip(f"NVVM compilation not supported: {e}") + else: + raise + finally: + program.close() + + except ImportError as e: + pytest.skip(f"NVVM not available: {e}") \ No newline at end of file From dc9a4e3dd3ba8e169cf46371c7687ae754136a45 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 17:38:31 +0530 Subject: [PATCH 06/42] refactor --- cuda_core/cuda/core/experimental/_module.py | 18 ------ cuda_core/cuda/core/experimental/_program.py | 63 +++++++++++--------- 2 files changed, 35 insertions(+), 46 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 8b3146fee3..63bb6ff260 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -544,24 +544,6 @@ def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Opt them (default to no mappings). """ return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) - - @staticmethod - def from_nvvm(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode": - """Create an :class:`ObjectCode` instance from an existing NVVM IR. - - Parameters - ---------- - module : Union[bytes, str] - Either a bytes object containing the in-memory NVVM IR code to load, or - a file path string pointing to the on-disk NVVM IR file to load. - name : Optional[str] - A human-readable identifier representing this code object. - symbol_mapping : Optional[dict] - A dictionary specifying how the unmangled symbol names (as keys) - should be mapped to the mangled names before trying to retrieve - them (default to no mappings). - """ - return ObjectCode._init(module, "nvvm", name=name, symbol_mapping=symbol_mapping) @staticmethod def from_fatbin( diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index e2c8bf6463..497b68f669 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -62,13 +62,12 @@ def _get_nvvm_module(): ) from cuda.bindings import nvvm - try: - from cuda.bindings._internal.nvvm import _inspect_function_pointers - _inspect_function_pointers() - except Exception as e: + from cuda.bindings._internal.nvvm import _inspect_function_pointer + if _inspect_function_pointer("__nvvmCreateProgram") == 0: raise ImportError( "NVVM library (libnvvm) is not available in this Python environment. " f"Original error: {e}" + ) _nvvm_module = nvvm @@ -431,7 +430,7 @@ class Program: class _MembersNeededForFinalize: __slots__ = "handle", "backend" - def __init__(self, program_obj, handle, backend="NVRTC"): + def __init__(self, program_obj, handle, backend): self.handle = handle self.backend = backend weakref.finalize(program_obj, self.close) @@ -441,17 +440,14 @@ def close(self): if self.backend == "NVRTC": handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) elif self.backend == "NVVM": - try: - nvvm = _get_nvvm_module() - nvvm.destroy_program(self.handle) - except ImportError as e: - pass + nvvm = _get_nvvm_module() + nvvm.destroy_program(self.handle) self.handle = None __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options") def __init__(self, code, code_type, options: ProgramOptions = None): - self._mnff = Program._MembersNeededForFinalize(self, None) + self._mnff = Program._MembersNeededForFinalize(self, None, None) self._options = options = check_or_create_options(ProgramOptions, options, "Program options") code_type = code_type.lower() @@ -507,6 +503,27 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: split_compile=options.split_compile, ptxas_options=options.ptxas_options, ) + + def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> List[str]: + """Translate ProgramOptions to NVVM-specific compilation options.""" + nvvm_options = [] + + if options.arch is not None: + arch = options.arch + if arch.startswith("sm_"): + arch = f"compute_{arch[3:]}" + nvvm_options.append(f"-arch={arch}") + else: + major, minor = Device().compute_capability + nvvm_options.append(f"-arch=compute_{major}{minor}") + if options.debug: + nvvm_options.append("-g") + if options.device_code_optimize is False: + nvvm_options.append("-opt=0") + elif options.device_code_optimize is True: + nvvm_options.append("-opt=3") + + return nvvm_options def close(self): """Destroy this program.""" @@ -592,23 +609,7 @@ def compile(self, target_type, name_expressions=(), logs=None): if target_type != "ptx": raise ValueError(f'NVVM backend only supports target_type="ptx", got "{target_type}"') - nvvm_options = [] - if self._options.arch is not None: - arch = self._options.arch - if arch.startswith("sm_"): - arch = f"compute_{arch[3:]}" - nvvm_options.append(f"-arch={arch}") - else: - major, minor = Device().compute_capability - nvvm_options.append(f"-arch=compute_{major}{minor}") - - if self._options.debug: - nvvm_options.append("-g") - if self._options.device_code_optimize is False: - nvvm_options.append("-opt=0") - elif self._options.device_code_optimize is True: - nvvm_options.append("-opt=3") - + nvvm_options = self._translate_program_options_to_nvvm(self._options) nvvm = _get_nvvm_module() nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) @@ -650,3 +651,9 @@ def handle(self) -> ProgramHandleT: handle, call ``int(Program.handle)``. """ return self._mnff.handle +aution:: + + This handle is a Python object. To get the memory address of the underlying C + handle, call ``int(Program.handle)``. + """ + return self._mnff.handle From 22d18b9bba81c10e31ee261d9c86a7bd107671aa Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 17:41:25 +0530 Subject: [PATCH 07/42] fix unwanted rebase --- cuda_core/cuda/core/experimental/_program.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 497b68f669..3b566df5b6 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -651,9 +651,4 @@ def handle(self) -> ProgramHandleT: handle, call ``int(Program.handle)``. """ return self._mnff.handle -aution:: - - This handle is a Python object. To get the memory address of the underlying C - handle, call ``int(Program.handle)``. - """ - return self._mnff.handle + \ No newline at end of file From 0f7fda4979a10868b5a74ecdc23668edbe7e8233 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 18:40:25 +0530 Subject: [PATCH 08/42] fix core linter errors --- cuda_core/cuda/core/experimental/_program.py | 39 +++++++++----------- cuda_core/tests/test_program.py | 26 ++++++------- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 3b566df5b6..8927534d2a 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -20,11 +20,11 @@ _handle_boolean_option, check_or_create_options, driver, - handle_return, get_binding_version, + handle_return, is_nested_sequence, is_sequence, - nvrtc + nvrtc, ) _nvvm_module = None @@ -37,22 +37,22 @@ def _get_nvvm_module(): NVVM bindings were added in CUDA 12.9.0, so we need to handle cases where: 1. cuda.bindings is not new enough (< 12.9.0) 2. libnvvm is not found in the Python environment - + Returns: The nvvm module if available and working - + Raises: ImportError: If NVVM is not available due to version or library issues """ global _nvvm_module, _nvvm_import_attempted - + if _nvvm_import_attempted: if _nvvm_module is None: raise ImportError("NVVM module is not available (previous import attempt failed)") return _nvvm_module - + _nvvm_import_attempted = True - + try: version = get_binding_version() if version < (12, 9): @@ -60,19 +60,17 @@ def _get_nvvm_module(): f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " "Please update cuda-bindings to use NVVM features." ) - + from cuda.bindings import nvvm from cuda.bindings._internal.nvvm import _inspect_function_pointer if _inspect_function_pointer("__nvvmCreateProgram") == 0: raise ImportError( "NVVM library (libnvvm) is not available in this Python environment. " - f"Original error: {e}" - ) - + _nvvm_module = nvvm return _nvvm_module - + except ImportError as e: _nvvm_module = None raise e @@ -474,7 +472,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): code = code.encode('utf-8') elif not isinstance(code, (bytes, bytearray)): raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") - + nvvm = _get_nvvm_module() self._mnff.handle = nvvm.create_program() self._mnff.backend = "NVVM" @@ -503,11 +501,11 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: split_compile=options.split_compile, ptxas_options=options.ptxas_options, ) - + def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> List[str]: """Translate ProgramOptions to NVVM-specific compilation options.""" nvvm_options = [] - + if options.arch is not None: arch = options.arch if arch.startswith("sm_"): @@ -522,7 +520,7 @@ def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> List[st nvvm_options.append("-opt=0") elif options.device_code_optimize is True: nvvm_options.append("-opt=3") - + return nvvm_options def close(self): @@ -608,22 +606,22 @@ def compile(self, target_type, name_expressions=(), logs=None): elif self._backend == "NVVM": if target_type != "ptx": raise ValueError(f'NVVM backend only supports target_type="ptx", got "{target_type}"') - + nvvm_options = self._translate_program_options_to_nvvm(self._options) nvvm = _get_nvvm_module() nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) - + size = nvvm.get_compiled_result_size(self._mnff.handle) data = bytearray(size) nvvm.get_compiled_result(self._mnff.handle, data) - + if logs is not None: logsize = nvvm.get_program_log_size(self._mnff.handle) if logsize > 1: log = bytearray(logsize) nvvm.get_program_log(self._mnff.handle, log) logs.write(log.decode("utf-8", errors="backslashreplace")) - + data_bytes = bytes(data) return ObjectCode._init(data_bytes, target_type, name=self._options.name) @@ -651,4 +649,3 @@ def handle(self) -> ProgramHandleT: handle, call ``int(Program.handle)``. """ return self._mnff.handle - \ No newline at end of file diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index bb1ba02d9b..c258e5b4d6 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -18,8 +18,8 @@ def get_nvvm_ir(): from cuda.core.experimental._program import _get_nvvm_module nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - - + + nvvm_ir_template = '''target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -53,10 +53,10 @@ def get_nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} ''' - + return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: - + return """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -273,17 +273,17 @@ def test_nvvm_compile_to_ptx(init_cuda): assert isinstance(ptx_object_code, ObjectCode) assert ptx_object_code.name == "nvvm_test" assert ptx_object_code._code_type == "ptx" - + ptx_code = ptx_object_code.code if isinstance(ptx_code, bytes): ptx_text = ptx_code.decode() else: ptx_text = str(ptx_code) assert ".visible .entry simple(" in ptx_text - + ptx_kernel = ptx_object_code.get_kernel("simple") assert isinstance(ptx_kernel, Kernel) - + except Exception as e: if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): pytest.skip(f"NVVM IR not compatible with this CUDA version: {e}") @@ -291,7 +291,7 @@ def test_nvvm_compile_to_ptx(init_cuda): raise finally: program.close() - + except ImportError as e: pytest.skip(f"NVVM not available: {e}") @@ -306,19 +306,19 @@ def test_nvvm_program_options(init_cuda, options): try: program = Program(nvvm_ir, "nvvm", options) assert program.backend == "NVVM" - + try: ptx_code = program.compile("ptx") assert isinstance(ptx_code, ObjectCode) assert ptx_code.name == options.name - + code_content = ptx_code.code if isinstance(code_content, bytes): ptx_text = code_content.decode() else: ptx_text = str(code_content) assert ".visible .entry simple(" in ptx_text - + except Exception as e: if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): pytest.skip(f"NVVM compilation not supported: {e}") @@ -326,6 +326,6 @@ def test_nvvm_program_options(init_cuda, options): raise finally: program.close() - + except ImportError as e: - pytest.skip(f"NVVM not available: {e}") \ No newline at end of file + pytest.skip(f"NVVM not available: {e}") From 9ed80511963941c873754cdc6ed2cc05e748e56c Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 20:58:45 +0530 Subject: [PATCH 09/42] refactor tests --- cuda_core/tests/test_program.py | 144 ++++++++++++-------------------- 1 file changed, 52 insertions(+), 92 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index c258e5b4d6..c511b22567 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -12,14 +12,29 @@ is_culink_backend = _linker._decide_nvjitlink_or_driver() -def get_nvvm_ir(): +def _is_nvvm_available(): + """Check if NVVM is available.""" + try: + from cuda.core.experimental._program import _get_nvvm_module + _get_nvvm_module() + return True + except ImportError: + return False + +nvvm_available = pytest.mark.skipif( + not _is_nvvm_available(), + reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)" +) + +@pytest.fixture(scope="session") +def nvvm_ir(): """Generate working NVVM IR with proper version metadata""" try: from cuda.core.experimental._program import _get_nvvm_module nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - - + + nvvm_ir_template = '''target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -53,10 +68,10 @@ def get_nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} ''' - + return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: - + return """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -88,8 +103,6 @@ def get_nvvm_ir(): !0 = !{void (i32*)* @simple, !"kernel", i32 1} """ -nvvm_ir = get_nvvm_ir() - @pytest.fixture(scope="module") def ptx_code_object(): code = 'extern "C" __global__ void my_kernel() {}' @@ -231,101 +244,48 @@ def test_program_close(): +@nvvm_available def test_nvvm_deferred_import(): """Test that our deferred NVVM import works correctly""" - try: - from cuda.core.experimental._program import _get_nvvm_module - nvvm = _get_nvvm_module() - assert nvvm is not None - except ImportError as e: - pytest.skip(f"NVVM not available: {e}") + from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() + assert nvvm is not None -def test_nvvm_program_creation(): +@nvvm_available +def test_nvvm_program_creation(nvvm_ir): """Test basic NVVM program creation""" - try: - program = Program(nvvm_ir, "nvvm") - assert program.backend == "NVVM" - assert program.handle is not None - program.close() - except ImportError as e: - pytest.skip(f"NVVM not available: {e}") + program = Program(nvvm_ir, "nvvm") + assert program.backend == "NVVM" + assert program.handle is not None + program.close() -def test_nvvm_compile_invalid_target(): +@nvvm_available +def test_nvvm_compile_invalid_target(nvvm_ir): """Test that NVVM programs reject invalid compilation targets""" - try: - program = Program(nvvm_ir, "nvvm") - with pytest.raises(ValueError, match="NVVM backend only supports target_type=\"ptx\""): - program.compile("cubin") - program.close() - except ImportError as e: - pytest.skip(f"NVVM not available: {e}") - - -def test_nvvm_compile_to_ptx(init_cuda): - """Test NVVM IR compilation to PTX""" - try: - options = ProgramOptions(name="nvvm_test", arch="sm_90", device_code_optimize=False) - program = Program(nvvm_ir, "nvvm", options=options) - try: - ptx_object_code = program.compile("ptx") - assert isinstance(ptx_object_code, ObjectCode) - assert ptx_object_code.name == "nvvm_test" - assert ptx_object_code._code_type == "ptx" - - ptx_code = ptx_object_code.code - if isinstance(ptx_code, bytes): - ptx_text = ptx_code.decode() - else: - ptx_text = str(ptx_code) - assert ".visible .entry simple(" in ptx_text - - ptx_kernel = ptx_object_code.get_kernel("simple") - assert isinstance(ptx_kernel, Kernel) - - except Exception as e: - if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): - pytest.skip(f"NVVM IR not compatible with this CUDA version: {e}") - else: - raise - finally: - program.close() - - except ImportError as e: - pytest.skip(f"NVVM not available: {e}") - + program = Program(nvvm_ir, "nvvm") + with pytest.raises(ValueError, match="NVVM backend only supports target_type=\"ptx\""): + program.compile("cubin") + program.close() +@nvvm_available @pytest.mark.parametrize("options", [ ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), - ProgramOptions(name="test2", arch="sm_90", device_code_optimize=False), - ProgramOptions(name="test3", arch="sm_90", device_code_optimize=True), + ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False), + ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), ]) -def test_nvvm_program_options(init_cuda, options): +def test_nvvm_program_options(init_cuda, nvvm_ir, options): """Test NVVM programs with different options""" - try: - program = Program(nvvm_ir, "nvvm", options) - assert program.backend == "NVVM" - - try: - ptx_code = program.compile("ptx") - assert isinstance(ptx_code, ObjectCode) - assert ptx_code.name == options.name - - code_content = ptx_code.code - if isinstance(code_content, bytes): - ptx_text = code_content.decode() - else: - ptx_text = str(code_content) - assert ".visible .entry simple(" in ptx_text - - except Exception as e: - if any(error in str(e) for error in ["ERROR_IR_VERSION_MISMATCH", "ERROR_INVALID_OPTION", "ERROR_COMPILATION"]): - pytest.skip(f"NVVM compilation not supported: {e}") - else: - raise - finally: - program.close() - - except ImportError as e: - pytest.skip(f"NVVM not available: {e}") + program = Program(nvvm_ir, "nvvm", options) + assert program.backend == "NVVM" + + ptx_code = program.compile("ptx") + assert isinstance(ptx_code, ObjectCode) + assert ptx_code.name == options.name + + code_content = ptx_code.code + ptx_text = code_content.decode() if isinstance(code_content, bytes) else str(code_content) + assert ".visible .entry simple(" in ptx_text + + program.close() \ No newline at end of file From bccda4709650b9c9042f16cf5de951844b961db4 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 20:59:49 +0530 Subject: [PATCH 10/42] refactor --- cuda_core/tests/test_program.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index c511b22567..64ecfb9c75 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -28,7 +28,12 @@ def _is_nvvm_available(): @pytest.fixture(scope="session") def nvvm_ir(): - """Generate working NVVM IR with proper version metadata""" + """Generate working NVVM IR with proper version metadata + The try clause here is used for older nvvm modules which + might not have an ir_version() method. In which case the + fallback assumes no version metadata will be present in + the input nvvm ir + """ try: from cuda.core.experimental._program import _get_nvvm_module nvvm = _get_nvvm_module() From 64436c174f63a0e5b0210da9903fbc83023188c3 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 3 Sep 2025 21:01:35 +0530 Subject: [PATCH 11/42] refactor --- cuda_core/tests/test_program.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 64ecfb9c75..126deb2d03 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -28,18 +28,18 @@ def _is_nvvm_available(): @pytest.fixture(scope="session") def nvvm_ir(): - """Generate working NVVM IR with proper version metadata + """Generate working NVVM IR with proper version metadata. The try clause here is used for older nvvm modules which - might not have an ir_version() method. In which case the - fallback assumes no version metadata will be present in + might not have an ir_version() method. In which case the + fallback assumes no version metadata will be present in the input nvvm ir """ try: from cuda.core.experimental._program import _get_nvvm_module nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - - + + nvvm_ir_template = '''target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -73,10 +73,10 @@ def nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} ''' - + return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: - + return """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @@ -284,13 +284,13 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options): """Test NVVM programs with different options""" program = Program(nvvm_ir, "nvvm", options) assert program.backend == "NVVM" - + ptx_code = program.compile("ptx") assert isinstance(ptx_code, ObjectCode) assert ptx_code.name == options.name - + code_content = ptx_code.code ptx_text = code_content.decode() if isinstance(code_content, bytes) else str(code_content) assert ".visible .entry simple(" in ptx_text - - program.close() \ No newline at end of file + + program.close() From 58317d0601a56a7418d1e17dc3594c3273da660b Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Thu, 4 Sep 2025 08:59:16 +0530 Subject: [PATCH 12/42] ruff format --- cuda_core/cuda/core/experimental/_program.py | 7 ++- cuda_core/tests/test_program.py | 48 ++++++++++++-------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 8927534d2a..20159b03e0 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -63,10 +63,9 @@ def _get_nvvm_module(): from cuda.bindings import nvvm from cuda.bindings._internal.nvvm import _inspect_function_pointer + if _inspect_function_pointer("__nvvmCreateProgram") == 0: - raise ImportError( - "NVVM library (libnvvm) is not available in this Python environment. " - ) + raise ImportError("NVVM library (libnvvm) is not available in this Python environment. ") _nvvm_module = nvvm return _nvvm_module @@ -469,7 +468,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): elif code_type == "nvvm": if isinstance(code, str): - code = code.encode('utf-8') + code = code.encode("utf-8") elif not isinstance(code, (bytes, bytearray)): raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 126deb2d03..49665564b5 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -12,36 +12,40 @@ is_culink_backend = _linker._decide_nvjitlink_or_driver() + def _is_nvvm_available(): """Check if NVVM is available.""" try: from cuda.core.experimental._program import _get_nvvm_module + _get_nvvm_module() return True except ImportError: return False + nvvm_available = pytest.mark.skipif( - not _is_nvvm_available(), - reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)" + not _is_nvvm_available(), reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)" ) + @pytest.fixture(scope="session") def nvvm_ir(): """Generate working NVVM IR with proper version metadata. - The try clause here is used for older nvvm modules which - might not have an ir_version() method. In which case the - fallback assumes no version metadata will be present in - the input nvvm ir + The try clause here is used for older nvvm modules which + might not have an ir_version() method. In which case the + fallback assumes no version metadata will be present in + the input nvvm ir """ try: from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() - - nvvm_ir_template = '''target triple = "nvptx64-unknown-cuda" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + nvvm_ir_template = """target triple = "nvptx64-unknown-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-" \ +"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) {{ entry: @@ -72,13 +76,13 @@ def nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} -''' +""" return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: - return """target triple = "nvptx64-unknown-cuda" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-" \ +"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) { entry: @@ -108,6 +112,7 @@ def nvvm_ir(): !0 = !{void (i32*)* @simple, !"kernel", i32 1} """ + @pytest.fixture(scope="module") def ptx_code_object(): code = 'extern "C" __global__ void my_kernel() {}' @@ -247,12 +252,11 @@ def test_program_close(): assert program.handle is None - - @nvvm_available def test_nvvm_deferred_import(): """Test that our deferred NVVM import works correctly""" from cuda.core.experimental._program import _get_nvvm_module + nvvm = _get_nvvm_module() assert nvvm is not None @@ -270,16 +274,20 @@ def test_nvvm_program_creation(nvvm_ir): def test_nvvm_compile_invalid_target(nvvm_ir): """Test that NVVM programs reject invalid compilation targets""" program = Program(nvvm_ir, "nvvm") - with pytest.raises(ValueError, match="NVVM backend only supports target_type=\"ptx\""): + with pytest.raises(ValueError, match='NVVM backend only supports target_type="ptx"'): program.compile("cubin") program.close() + @nvvm_available -@pytest.mark.parametrize("options", [ - ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), - ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False), - ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), -]) +@pytest.mark.parametrize( + "options", + [ + ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), + ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False), + ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), + ], +) def test_nvvm_program_options(init_cuda, nvvm_ir, options): """Test NVVM programs with different options""" program = Program(nvvm_ir, "nvvm", options) From caf4f22f1fe84ff4a016a94fecd8ff82f09d785e Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Thu, 4 Sep 2025 09:03:15 +0530 Subject: [PATCH 13/42] ruff format --- cuda_core/cuda/core/experimental/_program.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 20159b03e0..c3bca7ac7e 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -9,6 +9,8 @@ from typing import TYPE_CHECKING, List, Tuple, Union from warnings import warn +import list + if TYPE_CHECKING: import cuda.bindings @@ -501,7 +503,7 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: ptxas_options=options.ptxas_options, ) - def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> List[str]: + def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]: """Translate ProgramOptions to NVVM-specific compilation options.""" nvvm_options = [] From 88237bcdb18d9025f7e566d73f1e63c07704e5da Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Thu, 4 Sep 2025 09:10:18 +0530 Subject: [PATCH 14/42] revert changes to cuda_utils --- cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index 4a9d61aee9..60bcaeba6c 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -26,6 +26,7 @@ class CUDAError(Exception): class NVRTCError(CUDAError): pass + ComputeCapability = namedtuple("ComputeCapability", ("major", "minor")) @@ -55,6 +56,7 @@ cdef object _DRIVER_SUCCESS = driver.CUresult.CUDA_SUCCESS cdef object _RUNTIME_SUCCESS = runtime.cudaError_t.cudaSuccess cdef object _NVRTC_SUCCESS = nvrtc.nvrtcResult.NVRTC_SUCCESS + cpdef inline int _check_driver_error(error) except?-1: if error == _DRIVER_SUCCESS: return 0 @@ -100,6 +102,7 @@ cpdef inline int _check_nvrtc_error(error, handle=None) except?-1: err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}" raise NVRTCError(err) + cdef inline int _check_error(error, handle=None) except?-1: if isinstance(error, driver.CUresult): return _check_driver_error(error) @@ -218,4 +221,4 @@ def get_binding_version(): major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2] except importlib.metadata.PackageNotFoundError: major_minor = importlib.metadata.version("cuda-python").split(".")[:2] - return tuple(int(v) for v in major_minor) + return tuple(int(v) for v in major_minor) \ No newline at end of file From 5e2e137659df87a15a0b950e042a9d2c30883618 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Thu, 4 Sep 2025 09:13:10 +0530 Subject: [PATCH 15/42] new line --- cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx index 60bcaeba6c..86588f733c 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx @@ -221,4 +221,4 @@ def get_binding_version(): major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2] except importlib.metadata.PackageNotFoundError: major_minor = importlib.metadata.version("cuda-python").split(".")[:2] - return tuple(int(v) for v in major_minor) \ No newline at end of file + return tuple(int(v) for v in major_minor) From 0301a4d670e8533ef66367e23c28f1960dda2b9f Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Fri, 5 Sep 2025 08:44:19 +0530 Subject: [PATCH 16/42] fix CI rm list import --- cuda_core/cuda/core/experimental/_program.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index c3bca7ac7e..0c59298e60 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -9,8 +9,6 @@ from typing import TYPE_CHECKING, List, Tuple, Union from warnings import warn -import list - if TYPE_CHECKING: import cuda.bindings From 28e2d4b9e8872de93f66cc59ff81d4f13c8e1ab6 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Sun, 7 Sep 2025 14:12:36 +0530 Subject: [PATCH 17/42] use noqa --- cuda_core/tests/test_program.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 49665564b5..d8dc3af31d 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -44,8 +44,7 @@ def nvvm_ir(): major, minor, debug_major, debug_minor = nvvm.ir_version() nvvm_ir_template = """target triple = "nvptx64-unknown-cuda" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-" \ -"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) {{ entry: @@ -76,13 +75,12 @@ def nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} -""" +""" # noqa: E501 return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: return """target triple = "nvptx64-unknown-cuda" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-" \ -"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) { entry: @@ -110,7 +108,7 @@ def nvvm_ir(): !nvvm.annotations = !{!0} !0 = !{void (i32*)* @simple, !"kernel", i32 1} -""" +""" # noqa: E501 @pytest.fixture(scope="module") From af1008ff076accd6b40270c7e9be21e38ab897c9 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Sun, 7 Sep 2025 14:23:08 +0530 Subject: [PATCH 18/42] format --- cuda_core/tests/test_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index d8dc3af31d..656cdc428a 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -75,7 +75,7 @@ def nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} -""" # noqa: E501 +""" # noqa: E501 return nvvm_ir_template.format(major=major, debug_major=debug_major) except Exception: @@ -108,7 +108,7 @@ def nvvm_ir(): !nvvm.annotations = !{!0} !0 = !{void (i32*)* @simple, !"kernel", i32 1} -""" # noqa: E501 +""" # noqa: E501 @pytest.fixture(scope="module") From a85a44f7293dfe4578b4d30a50fed6b6a5f38eaf Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 8 Sep 2025 20:32:58 +0530 Subject: [PATCH 19/42] verify and skip 110 --- cuda_core/cuda/core/experimental/_program.py | 1 + cuda_core/tests/test_program.py | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 0c59298e60..da1ef315a2 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -608,6 +608,7 @@ def compile(self, target_type, name_expressions=(), logs=None): nvvm_options = self._translate_program_options_to_nvvm(self._options) nvvm = _get_nvvm_module() + nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) size = nvvm.get_compiled_result_size(self._mnff.handle) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 656cdc428a..a471df754f 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -9,7 +9,9 @@ from cuda.core.experimental import _linker from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions +from cuda.core.experimental._utils.cuda_utils import driver, handle_return +cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() @@ -283,7 +285,13 @@ def test_nvvm_compile_invalid_target(nvvm_ir): [ ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False), - ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), + pytest.param( + ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), + marks=pytest.mark.skipif( + 12000 <= cuda_driver_version < 13000, + reason="Compute capability 110 not supported with CUDA 12.x", + ), + ), ], ) def test_nvvm_program_options(init_cuda, nvvm_ir, options): From 0be06aa2bae81eb230fec3cf0e39dc7e755eabc4 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 8 Sep 2025 20:51:28 +0530 Subject: [PATCH 20/42] add flags and lto --- cuda_core/cuda/core/experimental/_program.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index da1ef315a2..1535a726cf 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -520,6 +520,15 @@ def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[st elif options.device_code_optimize is True: nvvm_options.append("-opt=3") + if options.ftz is not None: + nvvm_options.append(f"-ftz={'true' if options.ftz else 'false'}") + if options.prec_sqrt is not None: + nvvm_options.append(f"-prec-sqrt={'true' if options.prec_sqrt else 'false'}") + if options.prec_div is not None: + nvvm_options.append(f"-prec-div={'true' if options.prec_div else 'false'}") + if options.fma is not None: + nvvm_options.append(f"-fma={'true' if options.fma else 'false'}") + return nvvm_options def close(self): @@ -603,10 +612,12 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) elif self._backend == "NVVM": - if target_type != "ptx": - raise ValueError(f'NVVM backend only supports target_type="ptx", got "{target_type}"') + if target_type not in ("ptx", "ltoir"): + raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"') nvvm_options = self._translate_program_options_to_nvvm(self._options) + if target_type == "ltoir" and "-gen-lto" not in nvvm_options: + nvvm_options.append("-gen-lto") nvvm = _get_nvvm_module() nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) From 1c63a11da6cc29314d425cf3021a97cf0c98011d Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 15 Sep 2025 12:51:58 +0530 Subject: [PATCH 21/42] rename gpu-arch to arch --- cuda_core/cuda/core/experimental/_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1535a726cf..afccb5c191 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -277,10 +277,10 @@ def __post_init__(self): self._formatted_options = [] if self.arch is not None: - self._formatted_options.append(f"--gpu-architecture={self.arch}") + self._formatted_options.append(f"-arch={self.arch}") else: self._formatted_options.append( - "--gpu-architecture=sm_" + "".join(f"{i}" for i in Device().compute_capability) + "-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability) ) if self.relocatable_device_code is not None: self._formatted_options.append( From cab6db0c69fac0fd3497a008c1b22b7d9d838f83 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 15 Sep 2025 14:02:57 +0530 Subject: [PATCH 22/42] change libnvvm version check --- cuda_core/cuda/core/experimental/_program.py | 4 +- cuda_core/tests/test_program.py | 105 ++++++++++++++++++- 2 files changed, 103 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index afccb5c191..6a285a9819 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -279,9 +279,7 @@ def __post_init__(self): if self.arch is not None: self._formatted_options.append(f"-arch={self.arch}") else: - self._formatted_options.append( - "-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability) - ) + self._formatted_options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability)) if self.relocatable_device_code is not None: self._formatted_options.append( f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}" diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index a471df754f..209dfa1b0f 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -30,6 +30,83 @@ def _is_nvvm_available(): not _is_nvvm_available(), reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)" ) +try: + from cuda.core.experimental._utils.cuda_utils import driver, handle_return + + _cuda_driver_version = handle_return(driver.cuDriverGetVersion()) +except Exception: + _cuda_driver_version = 0 + +_libnvvm_version = None +_libnvvm_version_attempted = False + +precheck_nvvm_ir = """ +target triple = "nvptx64-nvidia-cuda" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +define void @dummy_kernel() { +entry: + ret void +} + +!nvvm.annotations = !{!0} +!0 = !{void ()* @dummy_kernel, !"kernel", i32 1} +""" # noqa: E501 + + +def _get_libnvvm_version_for_tests(): + """ + Detect libNVVM version by compiling dummy IR and analyzing the PTX output. + + Workaround for the lack of direct libNVVM version API (nvbugs 5312315). + The approach: + - Compile a small dummy NVVM IR to PTX + - Use PTX version analysis APIs if available to infer libNVVM version + - Cache the result for future use + """ + global _libnvvm_version, _libnvvm_version_attempted + + if _libnvvm_version_attempted: + return _libnvvm_version + + _libnvvm_version_attempted = True + + try: + from cuda.core.experimental._program import _get_nvvm_module + + nvvm = _get_nvvm_module() + + try: + from cuda.bindings.utils import get_ptx_ver, get_minimal_required_cuda_ver_from_ptx_ver + except ImportError: + _libnvvm_version = None + return _libnvvm_version + + program = nvvm.create_program() + try: + precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8") + nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") + + options = ["-arch=compute_70"] + nvvm.verify_program(program, len(options), options) + nvvm.compile_program(program, len(options), options) + + ptx_size = nvvm.get_compiled_result_size(program) + ptx_data = bytearray(ptx_size) + nvvm.get_compiled_result(program, ptx_data) + ptx_str = ptx_data.decode("utf-8") + ptx_version = get_ptx_ver(ptx_str) + cuda_version = get_minimal_required_cuda_ver_from_ptx_ver(ptx_version) + _libnvvm_version = cuda_version + return _libnvvm_version + + finally: + nvvm.destroy_program(program) + + except Exception: + _libnvvm_version = None + return _libnvvm_version + @pytest.fixture(scope="session") def nvvm_ir(): @@ -286,10 +363,32 @@ def test_nvvm_compile_invalid_target(nvvm_ir): ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False), ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False), pytest.param( - ProgramOptions(name="test3", arch="sm_110", device_code_optimize=True), + ProgramOptions(name="test_sm110_1", arch="sm_110", device_code_optimize=False), + marks=pytest.mark.skipif( + (_get_libnvvm_version_for_tests() or 0) < 13000, + reason="Compute capability 110 requires libNVVM >= 13.0", + ), + ), + pytest.param( + ProgramOptions( + name="test_sm110_2", + arch="sm_110", + ftz=True, + prec_sqrt=False, + prec_div=False, + fma=True, + device_code_optimize=True, + ), + marks=pytest.mark.skipif( + (_get_libnvvm_version_for_tests() or 0) < 13000, + reason="Compute capability 110 requires libNVVM >= 13.0", + ), + ), + pytest.param( + ProgramOptions(name="test_sm110_3", arch="sm_110", link_time_optimization=True), marks=pytest.mark.skipif( - 12000 <= cuda_driver_version < 13000, - reason="Compute capability 110 not supported with CUDA 12.x", + (_get_libnvvm_version_for_tests() or 0) < 13000, + reason="Compute capability 110 requires libNVVM >= 13.0", ), ), ], From 3abcd389713af1e24498c74e585307e3ab0918ec Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 15 Sep 2025 14:05:13 +0530 Subject: [PATCH 23/42] format --- cuda_core/tests/test_program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 209dfa1b0f..e4653d1164 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -77,7 +77,7 @@ def _get_libnvvm_version_for_tests(): nvvm = _get_nvvm_module() try: - from cuda.bindings.utils import get_ptx_ver, get_minimal_required_cuda_ver_from_ptx_ver + from cuda.bindings.utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver except ImportError: _libnvvm_version = None return _libnvvm_version From caf634cb95399b98f8e93ca4c472e943db452b1b Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Mon, 15 Sep 2025 14:10:20 +0530 Subject: [PATCH 24/42] compute 90 --- cuda_core/tests/test_program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index e4653d1164..effa1f0cfd 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -87,7 +87,7 @@ def _get_libnvvm_version_for_tests(): precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8") nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") - options = ["-arch=compute_70"] + options = ["-arch=compute_90"] nvvm.verify_program(program, len(options), options) nvvm.compile_program(program, len(options), options) From 64d19abb88e752e7324dbc578b45f239472faf93 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 15 Sep 2025 19:24:48 -0400 Subject: [PATCH 25/42] Apply suggestions from code review --- cuda_core/cuda/core/experimental/_program.py | 28 +++++++++----------- cuda_core/tests/test_program.py | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 6a285a9819..211ae50fbe 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -34,7 +34,7 @@ def _get_nvvm_module(): """ Handles the import of NVVM module with version and availability checks. - NVVM bindings were added in CUDA 12.9.0, so we need to handle cases where: + NVVM bindings were added in cuda-bindings 12.9.0, so we need to handle cases where: 1. cuda.bindings is not new enough (< 12.9.0) 2. libnvvm is not found in the Python environment @@ -42,13 +42,13 @@ def _get_nvvm_module(): The nvvm module if available and working Raises: - ImportError: If NVVM is not available due to version or library issues + RuntimeError: If NVVM is not available due to version or library issues """ global _nvvm_module, _nvvm_import_attempted if _nvvm_import_attempted: if _nvvm_module is None: - raise ImportError("NVVM module is not available (previous import attempt failed)") + raise RuntimeError("NVVM module is not available (previous import attempt failed)") return _nvvm_module _nvvm_import_attempted = True @@ -56,7 +56,7 @@ def _get_nvvm_module(): try: version = get_binding_version() if version < (12, 9): - raise ImportError( + raise RuntimeError( f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " "Please update cuda-bindings to use NVVM features." ) @@ -65,12 +65,12 @@ def _get_nvvm_module(): from cuda.bindings._internal.nvvm import _inspect_function_pointer if _inspect_function_pointer("__nvvmCreateProgram") == 0: - raise ImportError("NVVM library (libnvvm) is not available in this Python environment. ") + raise RuntimeError("NVVM library (libnvvm) is not available in this Python environment. ") _nvvm_module = nvvm return _nvvm_module - except ImportError as e: + except RuntimeError as e: _nvvm_module = None raise e @@ -279,7 +279,8 @@ def __post_init__(self): if self.arch is not None: self._formatted_options.append(f"-arch={self.arch}") else: - self._formatted_options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability)) + self.arch = f"sm_{Device().arch}" + self._formatted_options.append(f"-arch={self.arch}") if self.relocatable_device_code is not None: self._formatted_options.append( f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}" @@ -503,14 +504,11 @@ def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[st """Translate ProgramOptions to NVVM-specific compilation options.""" nvvm_options = [] - if options.arch is not None: - arch = options.arch - if arch.startswith("sm_"): - arch = f"compute_{arch[3:]}" - nvvm_options.append(f"-arch={arch}") - else: - major, minor = Device().compute_capability - nvvm_options.append(f"-arch=compute_{major}{minor}") + assert options.arch is not None + arch = options.arch + if arch.startswith("sm_"): + arch = f"compute_{arch[3:]}" + nvvm_options.append(f"-arch={arch}") if options.debug: nvvm_options.append("-g") if options.device_code_optimize is False: diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index effa1f0cfd..a2035ca391 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -22,7 +22,7 @@ def _is_nvvm_available(): _get_nvvm_module() return True - except ImportError: + except RuntimeError: return False From c5993dcc12d82267f0938251d86810a957b41b9d Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 06:30:49 +0000 Subject: [PATCH 26/42] update test --- cuda_core/tests/test_program.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index a2035ca391..a31650572a 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -40,17 +40,38 @@ def _is_nvvm_available(): _libnvvm_version = None _libnvvm_version_attempted = False -precheck_nvvm_ir = """ -target triple = "nvptx64-nvidia-cuda" +precheck_nvvm_ir = """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -define void @dummy_kernel() { +define i32 @ave(i32 %a, i32 %b) {{ +entry: + %add = add nsw i32 %a, %b + %div = sdiv i32 %add, 2 + ret i32 %div +}} + +define void @simple(i32* %data) {{ entry: + %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + %mul = mul i32 %0, %1 + %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %add = add i32 %mul, %2 + %call = call i32 @ave(i32 %add, i32 %add) + %idxprom = sext i32 %add to i64 + store i32 %call, i32* %data, align 4 ret void -} +}} -!nvvm.annotations = !{!0} -!0 = !{void ()* @dummy_kernel, !"kernel", i32 1} +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone + +!nvvm.annotations = !{{!0}} +!0 = !{{void (i32*)* @simple, !"kernel", i32 1}} + +!nvvmir.version = !{{!1}} +!1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} """ # noqa: E501 From 5d5b1d3a78483907696cd046ef8be2a7c0555ffd Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 07:02:46 +0000 Subject: [PATCH 27/42] use exception manager --- cuda_core/tests/test_program.py | 34 +++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index a31650572a..8bd7d778b2 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -10,11 +10,40 @@ from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions from cuda.core.experimental._utils.cuda_utils import driver, handle_return +from contextlib import contextmanager cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() +@contextmanager +def _nvvm_exception_manager(nvvm, program_handle): + """ + Taken from _linker.py + """ + try: + yield + except Exception as e: + error_log = "" + try: + # Try to get the NVVM program log + logsize = nvvm.get_program_log_size(program_handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(program_handle, log) + error_log = log.decode("utf-8", errors="backslashreplace") + except Exception: + # If we can't get the log, continue without it + pass + + # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but + # unfortunately we are still supporting Python 3.9/3.10... + # Append the NVVM program log to the original exception message + if error_log: + e.args = (e.args[0] + f"\nNVVM program log: {error_log}", *e.args[1:]) + raise e + + def _is_nvvm_available(): """Check if NVVM is available.""" try: @@ -109,8 +138,9 @@ def _get_libnvvm_version_for_tests(): nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") options = ["-arch=compute_90"] - nvvm.verify_program(program, len(options), options) - nvvm.compile_program(program, len(options), options) + with _nvvm_exception_manager(nvvm, program): + nvvm.verify_program(program, len(options), options) + nvvm.compile_program(program, len(options), options) ptx_size = nvvm.get_compiled_result_size(program) ptx_data = bytearray(ptx_size) From f6b55281f61e511d1b7976219b8c008970ff45aa Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 07:09:25 +0000 Subject: [PATCH 28/42] format --- cuda_core/tests/test_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8bd7d778b2..fb6c0b7d56 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -3,14 +3,14 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import warnings +from contextlib import contextmanager import pytest +from cuda.core.experimental._utils.cuda_utils import driver, handle_return from cuda.core.experimental import _linker from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions -from cuda.core.experimental._utils.cuda_utils import driver, handle_return -from contextlib import contextmanager cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() From c7fad0a8fe6a8029e2b23caac47c8288b668dc19 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 09:18:30 +0000 Subject: [PATCH 29/42] format ruff --- cuda_core/tests/test_program.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index fb6c0b7d56..08b2ad3486 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -33,8 +33,7 @@ def _nvvm_exception_manager(nvvm, program_handle): nvvm.get_program_log(program_handle, log) error_log = log.decode("utf-8", errors="backslashreplace") except Exception: - # If we can't get the log, continue without it - pass + error_log = "" # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but # unfortunately we are still supporting Python 3.9/3.10... From 2e6e02be8e7e0827f2253ca7bc48cc6db809ee73 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 09:30:24 +0000 Subject: [PATCH 30/42] [pre-commit.ci] auto code formatting --- cuda_core/tests/test_program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 08b2ad3486..7c5586f48f 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -6,11 +6,11 @@ from contextlib import contextmanager import pytest -from cuda.core.experimental._utils.cuda_utils import driver, handle_return from cuda.core.experimental import _linker from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions +from cuda.core.experimental._utils.cuda_utils import driver, handle_return cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() From 680d790af7fb2e332c261b36d67a00919768906f Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 09:52:19 +0000 Subject: [PATCH 31/42] add release notes --- cuda_core/docs/source/release/0.X.Y-notes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index 8024a14f62..c549769807 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -28,6 +28,8 @@ New features - Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. - CUDA 13.x testing support through new ``test-cu13`` dependency group. +- Added `NVVM IR` support for cuda_core frontend, which would allow cuda_python to take `NVVM IR` as an input format. New tests are added inside + ```test_program.py```. New examples From c55fa59ead57ebd184197e97f50bb9ba5b306aea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 09:53:24 +0000 Subject: [PATCH 32/42] [pre-commit.ci] auto code formatting --- cuda_core/docs/source/release/0.X.Y-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index c549769807..5cbe0bb7a0 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -28,7 +28,7 @@ New features - Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. - CUDA 13.x testing support through new ``test-cu13`` dependency group. -- Added `NVVM IR` support for cuda_core frontend, which would allow cuda_python to take `NVVM IR` as an input format. New tests are added inside +- Added `NVVM IR` support for cuda_core frontend, which would allow cuda_python to take `NVVM IR` as an input format. New tests are added inside ```test_program.py```. From 2cbee7f5242e204406cd243f4a813a0812d6bab4 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 09:58:13 +0000 Subject: [PATCH 33/42] rectify quotes --- cuda_core/docs/source/release/0.X.Y-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index 5cbe0bb7a0..b8b02bae70 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -29,7 +29,7 @@ New features - Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. - CUDA 13.x testing support through new ``test-cu13`` dependency group. - Added `NVVM IR` support for cuda_core frontend, which would allow cuda_python to take `NVVM IR` as an input format. New tests are added inside - ```test_program.py```. + ``test_program.py``. New examples From 63e8d57016ed5fee1c2f651c2bd62e4cef1d7a4b Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Tue, 16 Sep 2025 11:19:50 +0000 Subject: [PATCH 34/42] refix format --- cuda_core/docs/source/release/0.X.Y-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index b8b02bae70..c981ca01fe 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -28,7 +28,7 @@ New features - Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. - CUDA 13.x testing support through new ``test-cu13`` dependency group. -- Added `NVVM IR` support for cuda_core frontend, which would allow cuda_python to take `NVVM IR` as an input format. New tests are added inside +- Added NVVM IR support for cuda_core frontend, which would allow cuda_python to take NVVM IR as an input format. New tests are added inside ``test_program.py``. From 94c2e5646c9787f6fc3e048e2e6d63222c9efc85 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Sep 2025 08:35:30 +0000 Subject: [PATCH 35/42] refresh --- cuda_core/cuda/core/experimental/_program.py | 30 ++++++++- cuda_core/docs/source/release/0.X.Y-notes.rst | 4 +- cuda_core/tests/test_program.py | 61 ++----------------- 3 files changed, 35 insertions(+), 60 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 7c28db21cc..cabcfffe40 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -6,6 +6,7 @@ import weakref from dataclasses import dataclass +from contextlib import contextmanager from typing import TYPE_CHECKING, Union from warnings import warn @@ -27,6 +28,30 @@ nvrtc, ) +@contextmanager +def _nvvm_exception_manager(self): + """ + Taken from _linker.py + """ + try: + yield + except Exception as e: + error_log = "" + if hasattr(self, "_mnff"): + try: + nvvm = _get_nvvm_module() + logsize = nvvm.get_program_log_size(self._mnff.handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(self._mnff.handle, log) + error_log = log.decode("utf-8", errors="backslashreplace") + except Exception: + error_log = "" + # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but + # unfortunately we are still supporting Python 3.9/3.10... + e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:]) + raise e + _nvvm_module = None _nvvm_import_attempted = False @@ -615,8 +640,9 @@ def compile(self, target_type, name_expressions=(), logs=None): if target_type == "ltoir" and "-gen-lto" not in nvvm_options: nvvm_options.append("-gen-lto") nvvm = _get_nvvm_module() - nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) - nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) + with _nvvm_exception_manager(self): + nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) + nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) size = nvvm.get_compiled_result_size(self._mnff.handle) data = bytearray(size) diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index c981ca01fe..a1295e8eaa 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -28,9 +28,7 @@ New features - Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple. - CUDA 13.x testing support through new ``test-cu13`` dependency group. -- Added NVVM IR support for cuda_core frontend, which would allow cuda_python to take NVVM IR as an input format. New tests are added inside - ``test_program.py``. - +- Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``. New examples ------------ diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 7c5586f48f..c9caa7e463 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import warnings -from contextlib import contextmanager import pytest @@ -15,34 +14,6 @@ cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() - -@contextmanager -def _nvvm_exception_manager(nvvm, program_handle): - """ - Taken from _linker.py - """ - try: - yield - except Exception as e: - error_log = "" - try: - # Try to get the NVVM program log - logsize = nvvm.get_program_log_size(program_handle) - if logsize > 1: - log = bytearray(logsize) - nvvm.get_program_log(program_handle, log) - error_log = log.decode("utf-8", errors="backslashreplace") - except Exception: - error_log = "" - - # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but - # unfortunately we are still supporting Python 3.9/3.10... - # Append the NVVM program log to the original exception message - if error_log: - e.args = (e.args[0] + f"\nNVVM program log: {error_log}", *e.args[1:]) - raise e - - def _is_nvvm_available(): """Check if NVVM is available.""" try: @@ -71,29 +42,10 @@ def _is_nvvm_available(): precheck_nvvm_ir = """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -define i32 @ave(i32 %a, i32 %b) {{ -entry: - %add = add nsw i32 %a, %b - %div = sdiv i32 %add, 2 - ret i32 %div -}} - -define void @simple(i32* %data) {{ -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - %mul = mul i32 %0, %1 - %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - %add = add i32 %mul, %2 - %call = call i32 @ave(i32 %add, i32 %add) - %idxprom = sext i32 %add to i64 - store i32 %call, i32* %data, align 4 - ret void -}} - -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone +define void @dummy_kernel() { + entry: + ret void +} !nvvm.annotations = !{{!0}} !0 = !{{void (i32*)* @simple, !"kernel", i32 1}} @@ -137,9 +89,8 @@ def _get_libnvvm_version_for_tests(): nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") options = ["-arch=compute_90"] - with _nvvm_exception_manager(nvvm, program): - nvvm.verify_program(program, len(options), options) - nvvm.compile_program(program, len(options), options) + nvvm.verify_program(program, len(options), options) + nvvm.compile_program(program, len(options), options) ptx_size = nvvm.get_compiled_result_size(program) ptx_data = bytearray(ptx_size) From 34bf2cc4f3d52bf0db3d38f4a3ccbe82f07bf902 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 08:39:00 +0000 Subject: [PATCH 36/42] [pre-commit.ci] auto code formatting --- cuda_core/cuda/core/experimental/_program.py | 4 +++- cuda_core/tests/test_program.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index cabcfffe40..20050faec0 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -5,8 +5,8 @@ from __future__ import annotations import weakref -from dataclasses import dataclass from contextlib import contextmanager +from dataclasses import dataclass from typing import TYPE_CHECKING, Union from warnings import warn @@ -28,6 +28,7 @@ nvrtc, ) + @contextmanager def _nvvm_exception_manager(self): """ @@ -52,6 +53,7 @@ def _nvvm_exception_manager(self): e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:]) raise e + _nvvm_module = None _nvvm_import_attempted = False diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index c9caa7e463..9cb54e6dd9 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -14,6 +14,7 @@ cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() + def _is_nvvm_available(): """Check if NVVM is available.""" try: From 6b130bb73151b21a9b2fcbb7c6359bb3d0657320 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 17 Sep 2025 09:57:26 +0000 Subject: [PATCH 37/42] user major minor --- cuda_core/tests/test_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 9cb54e6dd9..5d77b4ae6f 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -155,10 +155,10 @@ def nvvm_ir(): !0 = !{{void (i32*)* @simple, !"kernel", i32 1}} !nvvmir.version = !{{!1}} -!1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} +!1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 - return nvvm_ir_template.format(major=major, debug_major=debug_major) + return nvvm_ir_template.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) except Exception: return """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" From d96c848e8f187f2c334d107716b51dc312f9173e Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 17 Sep 2025 15:58:58 -0400 Subject: [PATCH 38/42] fix test --- cuda_core/tests/test_program.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 5d77b4ae6f..c0257b5b19 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -52,7 +52,7 @@ def _is_nvvm_available(): !0 = !{{void (i32*)* @simple, !"kernel", i32 1}} !nvvmir.version = !{{!1}} -!1 = !{{i32 {major}, i32 0, i32 {debug_major}, i32 0}} +!1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 @@ -86,6 +86,8 @@ def _get_libnvvm_version_for_tests(): program = nvvm.create_program() try: + major, minor, debug_major, debug_minor = nvvm.ir_version() + precheck_nvvm_ir = precheck_nvvm_ir.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8") nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") From 8331ecf0bf2ebf72aad9573a4338b3d44edfe74b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 17 Sep 2025 20:15:07 +0000 Subject: [PATCH 39/42] fix IR - again --- cuda_core/tests/test_program.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index c0257b5b19..a9f560ae95 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -43,13 +43,13 @@ def _is_nvvm_available(): precheck_nvvm_ir = """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -define void @dummy_kernel() { +define void @dummy_kernel() {{ entry: ret void -} +}} !nvvm.annotations = !{{!0}} -!0 = !{{void (i32*)* @simple, !"kernel", i32 1}} +!0 = !{{void ()* @dummy_kernel, !"kernel", i32 1}} !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} @@ -87,6 +87,7 @@ def _get_libnvvm_version_for_tests(): program = nvvm.create_program() try: major, minor, debug_major, debug_minor = nvvm.ir_version() + global precheck_nvvm_ir precheck_nvvm_ir = precheck_nvvm_ir.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8") nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") @@ -103,7 +104,6 @@ def _get_libnvvm_version_for_tests(): cuda_version = get_minimal_required_cuda_ver_from_ptx_ver(ptx_version) _libnvvm_version = cuda_version return _libnvvm_version - finally: nvvm.destroy_program(program) From 2fa944ef821c1f855587e2ac7349e513c3fa4381 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 17 Sep 2025 20:18:34 +0000 Subject: [PATCH 40/42] fix nvvm option handling --- cuda_core/cuda/core/experimental/_program.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 20050faec0..9b28115d4e 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -542,15 +542,15 @@ def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[st nvvm_options.append("-opt=0") elif options.device_code_optimize is True: nvvm_options.append("-opt=3") - + # NVVM is not consistent with NVRTC, it uses 0/1 instead... if options.ftz is not None: - nvvm_options.append(f"-ftz={'true' if options.ftz else 'false'}") + nvvm_options.append(f"-ftz={'1' if options.ftz else '0'}") if options.prec_sqrt is not None: - nvvm_options.append(f"-prec-sqrt={'true' if options.prec_sqrt else 'false'}") + nvvm_options.append(f"-prec-sqrt={'1' if options.prec_sqrt else '0'}") if options.prec_div is not None: - nvvm_options.append(f"-prec-div={'true' if options.prec_div else 'false'}") + nvvm_options.append(f"-prec-div={'1' if options.prec_div else '0'}") if options.fma is not None: - nvvm_options.append(f"-fma={'true' if options.fma else 'false'}") + nvvm_options.append(f"-fma={'1' if options.fma else '0'}") return nvvm_options From 4d32276c54e9015f2a8e7748bf498d3244d8db89 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 17 Sep 2025 20:22:17 +0000 Subject: [PATCH 41/42] remove redundant IR & fix linter --- cuda_core/tests/test_program.py | 47 ++++++--------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index a9f560ae95..a842b293eb 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -88,7 +88,9 @@ def _get_libnvvm_version_for_tests(): try: major, minor, debug_major, debug_minor = nvvm.ir_version() global precheck_nvvm_ir - precheck_nvvm_ir = precheck_nvvm_ir.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) + precheck_nvvm_ir = precheck_nvvm_ir.format( + major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor + ) precheck_ir_bytes = precheck_nvvm_ir.encode("utf-8") nvvm.add_module_to_program(program, precheck_ir_bytes, len(precheck_ir_bytes), "precheck.ll") @@ -120,13 +122,12 @@ def nvvm_ir(): fallback assumes no version metadata will be present in the input nvvm ir """ - try: - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core.experimental._program import _get_nvvm_module - nvvm = _get_nvvm_module() - major, minor, debug_major, debug_minor = nvvm.ir_version() + nvvm = _get_nvvm_module() + major, minor, debug_major, debug_minor = nvvm.ir_version() - nvvm_ir_template = """target triple = "nvptx64-unknown-cuda" + nvvm_ir_template = """target triple = "nvptx64-unknown-cuda" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define i32 @ave(i32 %a, i32 %b) {{ @@ -159,39 +160,7 @@ def nvvm_ir(): !nvvmir.version = !{{!1}} !1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}} """ # noqa: E501 - - return nvvm_ir_template.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) - except Exception: - return """target triple = "nvptx64-unknown-cuda" -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - -define i32 @ave(i32 %a, i32 %b) { -entry: - %add = add nsw i32 %a, %b - %div = sdiv i32 %add, 2 - ret i32 %div -} - -define void @simple(i32* %data) { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - %mul = mul i32 %0, %1 - %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - %add = add i32 %mul, %2 - %call = call i32 @ave(i32 %add, i32 %add) - %idxprom = sext i32 %add to i64 - store i32 %call, i32* %data, align 4 - ret void -} - -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone - -!nvvm.annotations = !{!0} -!0 = !{void (i32*)* @simple, !"kernel", i32 1} -""" # noqa: E501 + return nvvm_ir_template.format(major=major, minor=minor, debug_major=debug_major, debug_minor=debug_minor) @pytest.fixture(scope="module") From e5b5ea4c7ef2d35ca13d60deb698f3a110a266dd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 17 Sep 2025 20:31:59 +0000 Subject: [PATCH 42/42] avoid extra copy + ensure compiled objcode loadable --- cuda_core/cuda/core/experimental/_module.py | 6 +++--- cuda_core/cuda/core/experimental/_program.py | 3 +-- .../cuda/core/experimental/_utils/clear_error_support.py | 6 +++--- cuda_core/tests/test_program.py | 4 +++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index c659a8d782..71293be4d1 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -11,7 +11,7 @@ from cuda.core.experimental._stream import Stream from cuda.core.experimental._utils.clear_error_support import ( assert_type, - assert_type_str_or_bytes, + assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable, ) from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition @@ -615,14 +615,14 @@ def _lazy_load_module(self, *args, **kwargs): if self._handle is not None: return module = self._module - assert_type_str_or_bytes(module) + assert_type_str_or_bytes_like(module) if isinstance(module, str): if self._backend_version == "new": self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0)) else: # "old" backend self._handle = handle_return(self._loader["file"](module.encode())) return - if isinstance(module, bytes): + if isinstance(module, (bytes, bytearray)): if self._backend_version == "new": self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0)) else: # "old" backend diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 9b28115d4e..dee6f001e7 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -657,8 +657,7 @@ def compile(self, target_type, name_expressions=(), logs=None): nvvm.get_program_log(self._mnff.handle, log) logs.write(log.decode("utf-8", errors="backslashreplace")) - data_bytes = bytes(data) - return ObjectCode._init(data_bytes, target_type, name=self._options.name) + return ObjectCode._init(data, target_type, name=self._options.name) supported_backends = ("nvJitLink", "driver") if self._backend not in supported_backends: diff --git a/cuda_core/cuda/core/experimental/_utils/clear_error_support.py b/cuda_core/cuda/core/experimental/_utils/clear_error_support.py index b13a3d6b02..0410e7aa2f 100644 --- a/cuda_core/cuda/core/experimental/_utils/clear_error_support.py +++ b/cuda_core/cuda/core/experimental/_utils/clear_error_support.py @@ -9,10 +9,10 @@ def assert_type(obj, expected_type): raise TypeError(f"Expected type {expected_type.__name__}, but got {type(obj).__name__}") -def assert_type_str_or_bytes(obj): +def assert_type_str_or_bytes_like(obj): """Ensure obj is of type str or bytes, else raise AssertionError with a clear message.""" - if not isinstance(obj, (str, bytes)): - raise TypeError(f"Expected type str or bytes, but got {type(obj).__name__}") + if not isinstance(obj, (str, bytes, bytearray)): + raise TypeError(f"Expected type str or bytes or bytearray, but got {type(obj).__name__}") def raise_code_path_meant_to_be_unreachable(): diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index a842b293eb..d30b845c20 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -312,11 +312,13 @@ def test_nvvm_deferred_import(): @nvvm_available -def test_nvvm_program_creation(nvvm_ir): +def test_nvvm_program_creation_compilation(nvvm_ir): """Test basic NVVM program creation""" program = Program(nvvm_ir, "nvvm") assert program.backend == "NVVM" assert program.handle is not None + obj = program.compile("ptx") + ker = obj.get_kernel("simple") # noqa: F841 program.close()