From fb004b6bb4822e388d04a28856109a31c67c11ef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Dec 2025 23:11:08 +0000 Subject: [PATCH 01/15] Initial plan From c8ac3ea1f77a3c2629d8c611117b2a75c5a94c4d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Dec 2025 23:19:56 +0000 Subject: [PATCH 02/15] Add as_bytes() public API with backend-specific option preparation Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 350 +++++++++++++++++-- cuda_core/tests/test_program.py | 73 ++++ 2 files changed, 398 insertions(+), 25 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index cdef7c3be6..696d016a28 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -18,6 +18,7 @@ from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._utils.clear_error_support import assert_type from cuda.core.experimental._utils.cuda_utils import ( + CUDAError, _handle_boolean_option, check_or_create_options, driver, @@ -422,7 +423,326 @@ def __post_init__(self): if self.numba_debug: self._formatted_options.append("--numba-debug") + def _prepare_nvrtc_options(self) -> list[bytes]: + """Prepare options for NVRTC backend. + + This method transforms the formatted options into bytes suitable for NVRTC compilation. + It validates that only NVRTC-compatible options are set and raises CUDAError for + unsupported options. + + Returns + ------- + list[bytes] + List of option strings encoded as bytes for NVRTC. + + Raises + ------ + CUDAError + If an option incompatible with NVRTC is set. + """ + # NVRTC uses all the formatted options that were set in __post_init__ + # All options in _formatted_options are already NVRTC-compatible + return list(o.encode() for o in self._formatted_options) + + def _prepare_nvjitlink_options(self) -> list[bytes]: + """Prepare options for nvJitLink backend. + + This method transforms the ProgramOptions into options suitable for nvJitLink linking. + It validates that only nvJitLink-compatible options are set and raises CUDAError for + unsupported options. + + Returns + ------- + list[bytes] + List of option strings encoded as bytes for nvJitLink. + + Raises + ------ + CUDAError + If an option incompatible with nvJitLink is set. + """ + options = [] + + # Options supported by nvJitLink (subset of ProgramOptions) + # Based on LinkerOptions._init_nvjitlink() in _linker.py + + # arch is always set + assert self.arch is not None + options.append(f"-arch={self.arch}") + + if self.max_register_count is not None: + options.append(f"-maxrregcount={self.max_register_count}") + + if self.time is not None: + options.append("-time") + + if self.debug is not None and self.debug: + options.append("-g") + + if self.lineinfo is not None and self.lineinfo: + options.append("-lineinfo") + + if self.ftz is not None: + options.append(f"-ftz={'true' if self.ftz else 'false'}") + + if self.prec_div is not None: + options.append(f"-prec-div={'true' if self.prec_div else 'false'}") + + if self.prec_sqrt is not None: + options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}") + + if self.fma is not None: + options.append(f"-fma={'true' if self.fma else 'false'}") + + if self.link_time_optimization is not None and self.link_time_optimization: + options.append("-lto") + + if self.ptxas_options is not None: + if isinstance(self.ptxas_options, str): + options.append(f"-Xptxas={self.ptxas_options}") + elif is_sequence(self.ptxas_options): + for opt in self.ptxas_options: + options.append(f"-Xptxas={opt}") + + if self.split_compile is not None: + options.append(f"-split-compile={self.split_compile}") + + # Check for unsupported options and raise error if they are set + unsupported = [] + if self.relocatable_device_code is not None: + unsupported.append("relocatable_device_code") + if self.extensible_whole_program is not None and self.extensible_whole_program: + unsupported.append("extensible_whole_program") + if self.device_code_optimize is not None: + unsupported.append("device_code_optimize") + if self.use_fast_math is not None and self.use_fast_math: + unsupported.append("use_fast_math") + if self.extra_device_vectorization is not None and self.extra_device_vectorization: + unsupported.append("extra_device_vectorization") + if self.gen_opt_lto is not None and self.gen_opt_lto: + unsupported.append("gen_opt_lto") + if self.define_macro is not None: + unsupported.append("define_macro") + if self.undefine_macro is not None: + unsupported.append("undefine_macro") + if self.include_path is not None: + unsupported.append("include_path") + if self.pre_include is not None: + unsupported.append("pre_include") + if self.no_source_include is not None and self.no_source_include: + unsupported.append("no_source_include") + if self.std is not None: + unsupported.append("std") + if self.builtin_move_forward is not None: + unsupported.append("builtin_move_forward") + if self.builtin_initializer_list is not None: + unsupported.append("builtin_initializer_list") + if self.disable_warnings is not None and self.disable_warnings: + unsupported.append("disable_warnings") + if self.restrict is not None and self.restrict: + unsupported.append("restrict") + if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: + unsupported.append("device_as_default_execution_space") + if self.device_int128 is not None and self.device_int128: + unsupported.append("device_int128") + if self.optimization_info is not None: + unsupported.append("optimization_info") + if self.no_display_error_number is not None and self.no_display_error_number: + unsupported.append("no_display_error_number") + if self.diag_error is not None: + unsupported.append("diag_error") + if self.diag_suppress is not None: + unsupported.append("diag_suppress") + if self.diag_warn is not None: + unsupported.append("diag_warn") + if self.brief_diagnostics is not None: + unsupported.append("brief_diagnostics") + if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: + unsupported.append("fdevice_syntax_only") + if self.minimal is not None and self.minimal: + unsupported.append("minimal") + if self.numba_debug is not None and self.numba_debug: + unsupported.append("numba_debug") + + if unsupported: + raise CUDAError( + f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}" + ) + + return list(o.encode() for o in options) + + def _prepare_nvvm_options(self) -> list[str]: + """Prepare options for NVVM backend. + + This method transforms the ProgramOptions into options suitable for NVVM compilation. + It validates that only NVVM-compatible options are set and raises CUDAError for + unsupported options. + + Returns + ------- + list[str] + List of option strings for NVVM (not encoded as bytes). + + Raises + ------ + CUDAError + If an option incompatible with NVVM is set. + """ + options = [] + + # Options supported by NVVM + # Based on _translate_program_options_to_nvvm() method + + assert self.arch is not None + arch = self.arch + if arch.startswith("sm_"): + arch = f"compute_{arch[3:]}" + options.append(f"-arch={arch}") + + if self.debug is not None and self.debug: + options.append("-g") + + if self.device_code_optimize is False: + options.append("-opt=0") + elif self.device_code_optimize is True: + options.append("-opt=3") + + # NVVM uses 0/1 instead of true/false for boolean options + if self.ftz is not None: + options.append(f"-ftz={'1' if self.ftz else '0'}") + + if self.prec_sqrt is not None: + options.append(f"-prec-sqrt={'1' if self.prec_sqrt else '0'}") + + if self.prec_div is not None: + options.append(f"-prec-div={'1' if self.prec_div else '0'}") + + if self.fma is not None: + options.append(f"-fma={'1' if self.fma else '0'}") + + # Check for unsupported options and raise error if they are set + unsupported = [] + if self.relocatable_device_code is not None: + unsupported.append("relocatable_device_code") + if self.extensible_whole_program is not None and self.extensible_whole_program: + unsupported.append("extensible_whole_program") + if self.lineinfo is not None and self.lineinfo: + unsupported.append("lineinfo") + if self.ptxas_options is not None: + unsupported.append("ptxas_options") + if self.max_register_count is not None: + unsupported.append("max_register_count") + if self.use_fast_math is not None and self.use_fast_math: + unsupported.append("use_fast_math") + if self.extra_device_vectorization is not None and self.extra_device_vectorization: + unsupported.append("extra_device_vectorization") + if self.link_time_optimization is not None and self.link_time_optimization: + unsupported.append("link_time_optimization") + if self.gen_opt_lto is not None and self.gen_opt_lto: + unsupported.append("gen_opt_lto") + if self.define_macro is not None: + unsupported.append("define_macro") + if self.undefine_macro is not None: + unsupported.append("undefine_macro") + if self.include_path is not None: + unsupported.append("include_path") + if self.pre_include is not None: + unsupported.append("pre_include") + if self.no_source_include is not None and self.no_source_include: + unsupported.append("no_source_include") + if self.std is not None: + unsupported.append("std") + if self.builtin_move_forward is not None: + unsupported.append("builtin_move_forward") + if self.builtin_initializer_list is not None: + unsupported.append("builtin_initializer_list") + if self.disable_warnings is not None and self.disable_warnings: + unsupported.append("disable_warnings") + if self.restrict is not None and self.restrict: + unsupported.append("restrict") + if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: + unsupported.append("device_as_default_execution_space") + if self.device_int128 is not None and self.device_int128: + unsupported.append("device_int128") + if self.optimization_info is not None: + unsupported.append("optimization_info") + if self.no_display_error_number is not None and self.no_display_error_number: + unsupported.append("no_display_error_number") + if self.diag_error is not None: + unsupported.append("diag_error") + if self.diag_suppress is not None: + unsupported.append("diag_suppress") + if self.diag_warn is not None: + unsupported.append("diag_warn") + if self.brief_diagnostics is not None: + unsupported.append("brief_diagnostics") + if self.time is not None: + unsupported.append("time") + if self.split_compile is not None: + unsupported.append("split_compile") + if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: + unsupported.append("fdevice_syntax_only") + if self.minimal is not None and self.minimal: + unsupported.append("minimal") + if self.numba_debug is not None and self.numba_debug: + unsupported.append("numba_debug") + + if unsupported: + raise CUDAError( + f"The following options are not supported by NVVM backend: {', '.join(unsupported)}" + ) + + return options + + def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]: + """Convert program options to bytes format for the specified backend. + + This method transforms the program options into a format suitable for the + specified compiler backend. Different backends may use different option names + and formats even for the same conceptual options. + + Parameters + ---------- + backend : str + The compiler backend to prepare options for. Must be one of: + - "nvrtc": NVIDIA Runtime Compilation (NVRTC) + - "nvjitlink": NVIDIA JIT Linker + - "nvvm": NVIDIA LLVM-based compiler + + Returns + ------- + Union[list[bytes], list[str]] + For "nvrtc" and "nvjitlink": list of option strings encoded as bytes. + For "nvvm": list of option strings (not encoded). + + Raises + ------ + ValueError + If an unknown backend is specified. + CUDAError + If an option incompatible with the specified backend is set. + + Examples + -------- + >>> options = ProgramOptions(arch="sm_80", debug=True) + >>> nvrtc_options = options.as_bytes("nvrtc") + >>> nvjitlink_options = options.as_bytes("nvjitlink") + >>> nvvm_options = options.as_bytes("nvvm") + """ + backend = backend.lower() + if backend == "nvrtc": + return self._prepare_nvrtc_options() + elif backend == "nvjitlink": + return self._prepare_nvjitlink_options() + elif backend == "nvvm": + return self._prepare_nvvm_options() + else: + raise ValueError( + f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'" + ) + def _as_bytes(self): + """Private method for backward compatibility. Use as_bytes('nvrtc') instead.""" # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved return list(o.encode() for o in self._formatted_options) @@ -531,31 +851,11 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: ) def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]: - """Translate ProgramOptions to NVVM-specific compilation options.""" - nvvm_options = [] - - assert options.arch is not None - arch = options.arch - if arch.startswith("sm_"): - arch = f"compute_{arch[3:]}" - nvvm_options.append(f"-arch={arch}") - if options.debug: - nvvm_options.append("-g") - if options.device_code_optimize is False: - nvvm_options.append("-opt=0") - elif options.device_code_optimize is True: - nvvm_options.append("-opt=3") - # NVVM is not consistent with NVRTC, it uses 0/1 instead... - if options.ftz is not None: - nvvm_options.append(f"-ftz={'1' if options.ftz else '0'}") - if options.prec_sqrt is not None: - nvvm_options.append(f"-prec-sqrt={'1' if options.prec_sqrt else '0'}") - if options.prec_div is not None: - nvvm_options.append(f"-prec-div={'1' if options.prec_div else '0'}") - if options.fma is not None: - nvvm_options.append(f"-fma={'1' if options.fma else '0'}") - - return nvvm_options + """Translate ProgramOptions to NVVM-specific compilation options. + + This method uses the new _prepare_nvvm_options private method. + """ + return options._prepare_nvvm_options() def close(self): """Destroy this program.""" diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8a6526fcc2..5b5092b8f6 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -411,3 +411,76 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options): assert ".visible .entry simple(" in ptx_text program.close() + + +def test_program_options_as_bytes_nvrtc(): + """Test ProgramOptions.as_bytes() for NVRTC backend""" + options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True) + nvrtc_options = options.as_bytes("nvrtc") + + # Should return list of bytes + assert isinstance(nvrtc_options, list) + assert all(isinstance(opt, bytes) for opt in nvrtc_options) + + # Decode to check content + options_str = [opt.decode() for opt in nvrtc_options] + assert "-arch=sm_80" in options_str + assert "--device-debug" in options_str + assert "--generate-line-info" in options_str + assert "--ftz=true" in options_str + + +def test_program_options_as_bytes_nvjitlink(): + """Test ProgramOptions.as_bytes() for nvJitLink backend""" + options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32) + nvjitlink_options = options.as_bytes("nvjitlink") + + # Should return list of bytes + assert isinstance(nvjitlink_options, list) + assert all(isinstance(opt, bytes) for opt in nvjitlink_options) + + # Decode to check content + options_str = [opt.decode() for opt in nvjitlink_options] + assert "-arch=sm_80" in options_str + assert "-g" in options_str + assert "-ftz=true" in options_str + assert "-maxrregcount=32" in options_str + + +@nvvm_available +def test_program_options_as_bytes_nvvm(): + """Test ProgramOptions.as_bytes() for NVVM backend""" + options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True) + nvvm_options = options.as_bytes("nvvm") + + # Should return list of strings (not bytes for NVVM) + assert isinstance(nvvm_options, list) + assert all(isinstance(opt, str) for opt in nvvm_options) + + # Check content + assert "-arch=compute_80" in nvvm_options + assert "-g" in nvvm_options + assert "-ftz=1" in nvvm_options + assert "-opt=3" in nvvm_options + + +def test_program_options_as_bytes_invalid_backend(): + """Test ProgramOptions.as_bytes() with invalid backend""" + options = ProgramOptions(arch="sm_80") + with pytest.raises(ValueError, match="Unknown backend 'invalid'"): + options.as_bytes("invalid") + + +def test_program_options_as_bytes_nvjitlink_unsupported_option(): + """Test that unsupported options raise CUDAError for nvJitLink backend""" + options = ProgramOptions(arch="sm_80", std="c++17") + with pytest.raises(CUDAError, match="not supported by nvJitLink backend"): + options.as_bytes("nvjitlink") + + +@nvvm_available +def test_program_options_as_bytes_nvvm_unsupported_option(): + """Test that unsupported options raise CUDAError for NVVM backend""" + options = ProgramOptions(arch="sm_80", lineinfo=True) + with pytest.raises(CUDAError, match="not supported by NVVM backend"): + options.as_bytes("nvvm") From 7a2a36a1be56c7ae3f48617afbf16a04b251d17b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Dec 2025 02:32:00 +0000 Subject: [PATCH 03/15] Address PR feedback: remove docstrings from private methods, make as_bytes always return list[bytes], remove _as_bytes Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 77 +++----------------- cuda_core/tests/test_program.py | 15 ++-- 2 files changed, 19 insertions(+), 73 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 696d016a28..45d27fac84 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -424,43 +424,11 @@ def __post_init__(self): self._formatted_options.append("--numba-debug") def _prepare_nvrtc_options(self) -> list[bytes]: - """Prepare options for NVRTC backend. - - This method transforms the formatted options into bytes suitable for NVRTC compilation. - It validates that only NVRTC-compatible options are set and raises CUDAError for - unsupported options. - - Returns - ------- - list[bytes] - List of option strings encoded as bytes for NVRTC. - - Raises - ------ - CUDAError - If an option incompatible with NVRTC is set. - """ # NVRTC uses all the formatted options that were set in __post_init__ # All options in _formatted_options are already NVRTC-compatible return list(o.encode() for o in self._formatted_options) def _prepare_nvjitlink_options(self) -> list[bytes]: - """Prepare options for nvJitLink backend. - - This method transforms the ProgramOptions into options suitable for nvJitLink linking. - It validates that only nvJitLink-compatible options are set and raises CUDAError for - unsupported options. - - Returns - ------- - list[bytes] - List of option strings encoded as bytes for nvJitLink. - - Raises - ------ - CUDAError - If an option incompatible with nvJitLink is set. - """ options = [] # Options supported by nvJitLink (subset of ProgramOptions) @@ -571,23 +539,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: return list(o.encode() for o in options) - def _prepare_nvvm_options(self) -> list[str]: - """Prepare options for NVVM backend. - - This method transforms the ProgramOptions into options suitable for NVVM compilation. - It validates that only NVVM-compatible options are set and raises CUDAError for - unsupported options. - - Returns - ------- - list[str] - List of option strings for NVVM (not encoded as bytes). - - Raises - ------ - CUDAError - If an option incompatible with NVVM is set. - """ + def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: options = [] # Options supported by NVVM @@ -692,9 +644,12 @@ def _prepare_nvvm_options(self) -> list[str]: f"The following options are not supported by NVVM backend: {', '.join(unsupported)}" ) - return options + if as_bytes: + return list(o.encode() for o in options) + else: + return options - def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]: + def as_bytes(self, backend: str) -> list[bytes]: """Convert program options to bytes format for the specified backend. This method transforms the program options into a format suitable for the @@ -711,9 +666,8 @@ def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]: Returns ------- - Union[list[bytes], list[str]] - For "nvrtc" and "nvjitlink": list of option strings encoded as bytes. - For "nvvm": list of option strings (not encoded). + list[bytes] + List of option strings encoded as bytes. Raises ------ @@ -735,17 +689,12 @@ def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]: elif backend == "nvjitlink": return self._prepare_nvjitlink_options() elif backend == "nvvm": - return self._prepare_nvvm_options() + return self._prepare_nvvm_options(as_bytes=True) else: raise ValueError( f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'" ) - def _as_bytes(self): - """Private method for backward compatibility. Use as_bytes('nvrtc') instead.""" - # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - return list(o.encode() for o in self._formatted_options) - def __repr__(self): # __TODO__ improve this return str(self._formatted_options) @@ -851,11 +800,7 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: ) def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]: - """Translate ProgramOptions to NVVM-specific compilation options. - - This method uses the new _prepare_nvvm_options private method. - """ - return options._prepare_nvvm_options() + return options._prepare_nvvm_options(as_bytes=False) def close(self): """Destroy this program.""" @@ -909,7 +854,7 @@ def compile(self, target_type, name_expressions=(), logs=None): nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()), handle=self._mnff.handle, ) - options = self._options._as_bytes() + options = self._options.as_bytes("nvrtc") handle_return( nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options), handle=self._mnff.handle, diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 5b5092b8f6..31897ab786 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -453,15 +453,16 @@ def test_program_options_as_bytes_nvvm(): options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True) nvvm_options = options.as_bytes("nvvm") - # Should return list of strings (not bytes for NVVM) + # Should return list of bytes (same as other backends) assert isinstance(nvvm_options, list) - assert all(isinstance(opt, str) for opt in nvvm_options) + assert all(isinstance(opt, bytes) for opt in nvvm_options) - # Check content - assert "-arch=compute_80" in nvvm_options - assert "-g" in nvvm_options - assert "-ftz=1" in nvvm_options - assert "-opt=3" in nvvm_options + # Decode to check content + options_str = [opt.decode() for opt in nvvm_options] + assert "-arch=compute_80" in options_str + assert "-g" in options_str + assert "-ftz=1" in options_str + assert "-opt=3" in options_str def test_program_options_as_bytes_invalid_backend(): From 2c4a0b620c6a8024c1c43fc75923a8a87acd7cd2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Dec 2025 02:51:59 +0000 Subject: [PATCH 04/15] Remove wrapper methods and use prepare methods directly Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 44 ++++++++------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 45d27fac84..8c2d43e3e7 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -431,9 +431,6 @@ def _prepare_nvrtc_options(self) -> list[bytes]: def _prepare_nvjitlink_options(self) -> list[bytes]: options = [] - # Options supported by nvJitLink (subset of ProgramOptions) - # Based on LinkerOptions._init_nvjitlink() in _linker.py - # arch is always set assert self.arch is not None options.append(f"-arch={self.arch}") @@ -543,7 +540,6 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis options = [] # Options supported by NVVM - # Based on _translate_program_options_to_nvvm() method assert self.arch is not None arch = self.arch @@ -760,7 +756,22 @@ def __init__(self, code, code_type, options: ProgramOptions = None): elif code_type == "ptx": assert_type(code, str) self._linker = Linker( - ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) + ObjectCode._init(code.encode(), code_type), + options=LinkerOptions( + name=options.name, + arch=options.arch, + max_register_count=options.max_register_count, + time=options.time, + debug=options.debug, + lineinfo=options.lineinfo, + ftz=options.ftz, + prec_div=options.prec_div, + prec_sqrt=options.prec_sqrt, + fma=options.fma, + link_time_optimization=options.link_time_optimization, + split_compile=options.split_compile, + ptxas_options=options.ptxas_options, + ), ) self._backend = self._linker.backend @@ -782,26 +793,6 @@ def __init__(self, code, code_type, options: ProgramOptions = None): assert code_type not in supported_code_types, f"{code_type=}" raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") - def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: - return LinkerOptions( - name=options.name, - arch=options.arch, - max_register_count=options.max_register_count, - time=options.time, - debug=options.debug, - lineinfo=options.lineinfo, - ftz=options.ftz, - prec_div=options.prec_div, - prec_sqrt=options.prec_sqrt, - fma=options.fma, - link_time_optimization=options.link_time_optimization, - split_compile=options.split_compile, - ptxas_options=options.ptxas_options, - ) - - def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]: - return options._prepare_nvvm_options(as_bytes=False) - def close(self): """Destroy this program.""" if self._linker: @@ -886,7 +877,8 @@ def compile(self, target_type, name_expressions=(), logs=None): if target_type not in ("ptx", "ltoir"): raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"') - nvvm_options = self._translate_program_options_to_nvvm(self._options) + # TODO: flip to True when NVIDIA/cuda-python#1354 is resolved and CUDA 12 is dropped + nvvm_options = self._options._prepare_nvvm_options(as_bytes=False) if target_type == "ltoir" and "-gen-lto" not in nvvm_options: nvvm_options.append("-gen-lto") nvvm = _get_nvvm_module() From d8f853c3d09717b4819d28720158411559b400c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Dec 2025 03:29:39 +0000 Subject: [PATCH 05/15] Restore _translate_program_options method for PTX code path Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 34 +++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 8c2d43e3e7..51b5312754 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -756,22 +756,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): elif code_type == "ptx": assert_type(code, str) self._linker = Linker( - ObjectCode._init(code.encode(), code_type), - options=LinkerOptions( - name=options.name, - arch=options.arch, - max_register_count=options.max_register_count, - time=options.time, - debug=options.debug, - lineinfo=options.lineinfo, - ftz=options.ftz, - prec_div=options.prec_div, - prec_sqrt=options.prec_sqrt, - fma=options.fma, - link_time_optimization=options.link_time_optimization, - split_compile=options.split_compile, - ptxas_options=options.ptxas_options, - ), + ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) self._backend = self._linker.backend @@ -793,6 +778,23 @@ def __init__(self, code, code_type, options: ProgramOptions = None): assert code_type not in supported_code_types, f"{code_type=}" raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") + def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: + return LinkerOptions( + name=options.name, + arch=options.arch, + max_register_count=options.max_register_count, + time=options.time, + debug=options.debug, + lineinfo=options.lineinfo, + ftz=options.ftz, + prec_div=options.prec_div, + prec_sqrt=options.prec_sqrt, + fma=options.fma, + link_time_optimization=options.link_time_optimization, + split_compile=options.split_compile, + ptxas_options=options.ptxas_options, + ) + def close(self): """Destroy this program.""" if self._linker: From 473e2b9d74987847b437bdc0c6b8155f4a71ad4b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 9 Dec 2025 22:32:02 -0500 Subject: [PATCH 06/15] remove redundant docstrings --- cuda_core/cuda/core/experimental/_program.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 51b5312754..b64dbdff3a 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -676,8 +676,6 @@ def as_bytes(self, backend: str) -> list[bytes]: -------- >>> options = ProgramOptions(arch="sm_80", debug=True) >>> nvrtc_options = options.as_bytes("nvrtc") - >>> nvjitlink_options = options.as_bytes("nvjitlink") - >>> nvvm_options = options.as_bytes("nvvm") """ backend = backend.lower() if backend == "nvrtc": From 79d25088c249b88d04243c943852f9695ee5b4d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Dec 2025 03:37:19 +0000 Subject: [PATCH 07/15] Remove blank lines in private methods to make code blocks more compact Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 25 -------------------- 1 file changed, 25 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index b64dbdff3a..2c8dfde4ad 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -430,48 +430,35 @@ def _prepare_nvrtc_options(self) -> list[bytes]: def _prepare_nvjitlink_options(self) -> list[bytes]: options = [] - # arch is always set assert self.arch is not None options.append(f"-arch={self.arch}") - if self.max_register_count is not None: options.append(f"-maxrregcount={self.max_register_count}") - if self.time is not None: options.append("-time") - if self.debug is not None and self.debug: options.append("-g") - if self.lineinfo is not None and self.lineinfo: options.append("-lineinfo") - if self.ftz is not None: options.append(f"-ftz={'true' if self.ftz else 'false'}") - if self.prec_div is not None: options.append(f"-prec-div={'true' if self.prec_div else 'false'}") - if self.prec_sqrt is not None: options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}") - if self.fma is not None: options.append(f"-fma={'true' if self.fma else 'false'}") - if self.link_time_optimization is not None and self.link_time_optimization: options.append("-lto") - if self.ptxas_options is not None: if isinstance(self.ptxas_options, str): options.append(f"-Xptxas={self.ptxas_options}") elif is_sequence(self.ptxas_options): for opt in self.ptxas_options: options.append(f"-Xptxas={opt}") - if self.split_compile is not None: options.append(f"-split-compile={self.split_compile}") - # Check for unsupported options and raise error if they are set unsupported = [] if self.relocatable_device_code is not None: @@ -528,7 +515,6 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: unsupported.append("minimal") if self.numba_debug is not None and self.numba_debug: unsupported.append("numba_debug") - if unsupported: raise CUDAError( f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}" @@ -538,36 +524,27 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: options = [] - # Options supported by NVVM - assert self.arch is not None arch = self.arch if arch.startswith("sm_"): arch = f"compute_{arch[3:]}" options.append(f"-arch={arch}") - if self.debug is not None and self.debug: options.append("-g") - if self.device_code_optimize is False: options.append("-opt=0") elif self.device_code_optimize is True: options.append("-opt=3") - # NVVM uses 0/1 instead of true/false for boolean options if self.ftz is not None: options.append(f"-ftz={'1' if self.ftz else '0'}") - if self.prec_sqrt is not None: options.append(f"-prec-sqrt={'1' if self.prec_sqrt else '0'}") - if self.prec_div is not None: options.append(f"-prec-div={'1' if self.prec_div else '0'}") - if self.fma is not None: options.append(f"-fma={'1' if self.fma else '0'}") - # Check for unsupported options and raise error if they are set unsupported = [] if self.relocatable_device_code is not None: @@ -634,12 +611,10 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis unsupported.append("minimal") if self.numba_debug is not None and self.numba_debug: unsupported.append("numba_debug") - if unsupported: raise CUDAError( f"The following options are not supported by NVVM backend: {', '.join(unsupported)}" ) - if as_bytes: return list(o.encode() for o in options) else: From 3f658b5b498f7bbb76be4e7e055a68e53debdbbd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 9 Dec 2025 22:49:37 -0500 Subject: [PATCH 08/15] nits --- cuda_core/cuda/core/experimental/_program.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 2c8dfde4ad..51ae07d57c 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -430,6 +430,7 @@ def _prepare_nvrtc_options(self) -> list[bytes]: def _prepare_nvjitlink_options(self) -> list[bytes]: options = [] + # arch is always set assert self.arch is not None options.append(f"-arch={self.arch}") @@ -459,6 +460,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: options.append(f"-Xptxas={opt}") if self.split_compile is not None: options.append(f"-split-compile={self.split_compile}") + # Check for unsupported options and raise error if they are set unsupported = [] if self.relocatable_device_code is not None: @@ -524,6 +526,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: options = [] + # Options supported by NVVM assert self.arch is not None arch = self.arch @@ -545,6 +548,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis options.append(f"-prec-div={'1' if self.prec_div else '0'}") if self.fma is not None: options.append(f"-fma={'1' if self.fma else '0'}") + # Check for unsupported options and raise error if they are set unsupported = [] if self.relocatable_device_code is not None: @@ -615,6 +619,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis raise CUDAError( f"The following options are not supported by NVVM backend: {', '.join(unsupported)}" ) + if as_bytes: return list(o.encode() for o in options) else: @@ -630,10 +635,8 @@ def as_bytes(self, backend: str) -> list[bytes]: Parameters ---------- backend : str - The compiler backend to prepare options for. Must be one of: - - "nvrtc": NVIDIA Runtime Compilation (NVRTC) - - "nvjitlink": NVIDIA JIT Linker - - "nvvm": NVIDIA LLVM-based compiler + The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink", + or "nvvm". Returns ------- From 7b1ecbb621e9b511e2d95438ef4a66542d8f08ec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Dec 2025 03:55:43 +0000 Subject: [PATCH 09/15] [pre-commit.ci] auto code formatting --- cuda_core/cuda/core/experimental/_program.py | 24 ++++++++------------ cuda_core/tests/test_program.py | 12 +++++----- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 51ae07d57c..599ce362c4 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -518,10 +518,8 @@ def _prepare_nvjitlink_options(self) -> list[bytes]: if self.numba_debug is not None and self.numba_debug: unsupported.append("numba_debug") if unsupported: - raise CUDAError( - f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}" - ) - + raise CUDAError(f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}") + return list(o.encode() for o in options) def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: @@ -616,9 +614,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis if self.numba_debug is not None and self.numba_debug: unsupported.append("numba_debug") if unsupported: - raise CUDAError( - f"The following options are not supported by NVVM backend: {', '.join(unsupported)}" - ) + raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}") if as_bytes: return list(o.encode() for o in options) @@ -627,29 +623,29 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis def as_bytes(self, backend: str) -> list[bytes]: """Convert program options to bytes format for the specified backend. - + This method transforms the program options into a format suitable for the specified compiler backend. Different backends may use different option names and formats even for the same conceptual options. - + Parameters ---------- backend : str The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink", or "nvvm". - + Returns ------- list[bytes] List of option strings encoded as bytes. - + Raises ------ ValueError If an unknown backend is specified. CUDAError If an option incompatible with the specified backend is set. - + Examples -------- >>> options = ProgramOptions(arch="sm_80", debug=True) @@ -663,9 +659,7 @@ def as_bytes(self, backend: str) -> list[bytes]: elif backend == "nvvm": return self._prepare_nvvm_options(as_bytes=True) else: - raise ValueError( - f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'" - ) + raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'") def __repr__(self): # __TODO__ improve this diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 31897ab786..540fd94b0b 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -417,11 +417,11 @@ def test_program_options_as_bytes_nvrtc(): """Test ProgramOptions.as_bytes() for NVRTC backend""" options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True) nvrtc_options = options.as_bytes("nvrtc") - + # Should return list of bytes assert isinstance(nvrtc_options, list) assert all(isinstance(opt, bytes) for opt in nvrtc_options) - + # Decode to check content options_str = [opt.decode() for opt in nvrtc_options] assert "-arch=sm_80" in options_str @@ -434,11 +434,11 @@ def test_program_options_as_bytes_nvjitlink(): """Test ProgramOptions.as_bytes() for nvJitLink backend""" options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32) nvjitlink_options = options.as_bytes("nvjitlink") - + # Should return list of bytes assert isinstance(nvjitlink_options, list) assert all(isinstance(opt, bytes) for opt in nvjitlink_options) - + # Decode to check content options_str = [opt.decode() for opt in nvjitlink_options] assert "-arch=sm_80" in options_str @@ -452,11 +452,11 @@ def test_program_options_as_bytes_nvvm(): """Test ProgramOptions.as_bytes() for NVVM backend""" options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True) nvvm_options = options.as_bytes("nvvm") - + # Should return list of bytes (same as other backends) assert isinstance(nvvm_options, list) assert all(isinstance(opt, bytes) for opt in nvvm_options) - + # Decode to check content options_str = [opt.decode() for opt in nvvm_options] assert "-arch=compute_80" in options_str From f2bb74ef3b61c1494540e24299d9ebe7b819df1d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Dec 2025 15:24:22 +0000 Subject: [PATCH 10/15] Refactor: move NVRTC option building to lazy evaluation in _prepare_nvrtc_options Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_program.py | 119 +++++++++---------- 1 file changed, 54 insertions(+), 65 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 599ce362c4..28e4e12eb2 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -303,130 +303,120 @@ class ProgramOptions: def __post_init__(self): self._name = self.name.encode() - - self._formatted_options = [] - if self.arch is not None: - self._formatted_options.append(f"-arch={self.arch}") - else: + # Set arch to default if not provided + if self.arch is None: self.arch = f"sm_{Device().arch}" - self._formatted_options.append(f"-arch={self.arch}") + + def _prepare_nvrtc_options(self) -> list[bytes]: + # Build NVRTC-specific options + options = [] + options.append(f"-arch={self.arch}") if self.relocatable_device_code is not None: - self._formatted_options.append( - f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}" - ) + options.append(f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}") if self.extensible_whole_program is not None and self.extensible_whole_program: - self._formatted_options.append("--extensible-whole-program") + options.append("--extensible-whole-program") if self.debug is not None and self.debug: - self._formatted_options.append("--device-debug") + options.append("--device-debug") if self.lineinfo is not None and self.lineinfo: - self._formatted_options.append("--generate-line-info") + options.append("--generate-line-info") if self.device_code_optimize is not None and self.device_code_optimize: - self._formatted_options.append("--dopt=on") + options.append("--dopt=on") if self.ptxas_options is not None: opt_name = "--ptxas-options" if isinstance(self.ptxas_options, str): - self._formatted_options.append(f"{opt_name}={self.ptxas_options}") + options.append(f"{opt_name}={self.ptxas_options}") elif is_sequence(self.ptxas_options): for opt_value in self.ptxas_options: - self._formatted_options.append(f"{opt_name}={opt_value}") + options.append(f"{opt_name}={opt_value}") if self.max_register_count is not None: - self._formatted_options.append(f"--maxrregcount={self.max_register_count}") + options.append(f"--maxrregcount={self.max_register_count}") if self.ftz is not None: - self._formatted_options.append(f"--ftz={_handle_boolean_option(self.ftz)}") + options.append(f"--ftz={_handle_boolean_option(self.ftz)}") if self.prec_sqrt is not None: - self._formatted_options.append(f"--prec-sqrt={_handle_boolean_option(self.prec_sqrt)}") + options.append(f"--prec-sqrt={_handle_boolean_option(self.prec_sqrt)}") if self.prec_div is not None: - self._formatted_options.append(f"--prec-div={_handle_boolean_option(self.prec_div)}") + options.append(f"--prec-div={_handle_boolean_option(self.prec_div)}") if self.fma is not None: - self._formatted_options.append(f"--fmad={_handle_boolean_option(self.fma)}") + options.append(f"--fmad={_handle_boolean_option(self.fma)}") if self.use_fast_math is not None and self.use_fast_math: - self._formatted_options.append("--use_fast_math") + options.append("--use_fast_math") if self.extra_device_vectorization is not None and self.extra_device_vectorization: - self._formatted_options.append("--extra-device-vectorization") + options.append("--extra-device-vectorization") if self.link_time_optimization is not None and self.link_time_optimization: - self._formatted_options.append("--dlink-time-opt") + options.append("--dlink-time-opt") if self.gen_opt_lto is not None and self.gen_opt_lto: - self._formatted_options.append("--gen-opt-lto") + options.append("--gen-opt-lto") if self.define_macro is not None: - _process_define_macro(self._formatted_options, self.define_macro) + _process_define_macro(options, self.define_macro) if self.undefine_macro is not None: if isinstance(self.undefine_macro, str): - self._formatted_options.append(f"--undefine-macro={self.undefine_macro}") + options.append(f"--undefine-macro={self.undefine_macro}") elif is_sequence(self.undefine_macro): for macro in self.undefine_macro: - self._formatted_options.append(f"--undefine-macro={macro}") + options.append(f"--undefine-macro={macro}") if self.include_path is not None: if isinstance(self.include_path, str): - self._formatted_options.append(f"--include-path={self.include_path}") + options.append(f"--include-path={self.include_path}") elif is_sequence(self.include_path): for path in self.include_path: - self._formatted_options.append(f"--include-path={path}") + options.append(f"--include-path={path}") if self.pre_include is not None: if isinstance(self.pre_include, str): - self._formatted_options.append(f"--pre-include={self.pre_include}") + options.append(f"--pre-include={self.pre_include}") elif is_sequence(self.pre_include): for header in self.pre_include: - self._formatted_options.append(f"--pre-include={header}") - + options.append(f"--pre-include={header}") if self.no_source_include is not None and self.no_source_include: - self._formatted_options.append("--no-source-include") + options.append("--no-source-include") if self.std is not None: - self._formatted_options.append(f"--std={self.std}") + options.append(f"--std={self.std}") if self.builtin_move_forward is not None: - self._formatted_options.append( - f"--builtin-move-forward={_handle_boolean_option(self.builtin_move_forward)}" - ) + options.append(f"--builtin-move-forward={_handle_boolean_option(self.builtin_move_forward)}") if self.builtin_initializer_list is not None: - self._formatted_options.append( - f"--builtin-initializer-list={_handle_boolean_option(self.builtin_initializer_list)}" - ) + options.append(f"--builtin-initializer-list={_handle_boolean_option(self.builtin_initializer_list)}") if self.disable_warnings is not None and self.disable_warnings: - self._formatted_options.append("--disable-warnings") + options.append("--disable-warnings") if self.restrict is not None and self.restrict: - self._formatted_options.append("--restrict") + options.append("--restrict") if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: - self._formatted_options.append("--device-as-default-execution-space") + options.append("--device-as-default-execution-space") if self.device_int128 is not None and self.device_int128: - self._formatted_options.append("--device-int128") + options.append("--device-int128") if self.optimization_info is not None: - self._formatted_options.append(f"--optimization-info={self.optimization_info}") + options.append(f"--optimization-info={self.optimization_info}") if self.no_display_error_number is not None and self.no_display_error_number: - self._formatted_options.append("--no-display-error-number") + options.append("--no-display-error-number") if self.diag_error is not None: if isinstance(self.diag_error, int): - self._formatted_options.append(f"--diag-error={self.diag_error}") + options.append(f"--diag-error={self.diag_error}") elif is_sequence(self.diag_error): for error in self.diag_error: - self._formatted_options.append(f"--diag-error={error}") + options.append(f"--diag-error={error}") if self.diag_suppress is not None: if isinstance(self.diag_suppress, int): - self._formatted_options.append(f"--diag-suppress={self.diag_suppress}") + options.append(f"--diag-suppress={self.diag_suppress}") elif is_sequence(self.diag_suppress): for suppress in self.diag_suppress: - self._formatted_options.append(f"--diag-suppress={suppress}") + options.append(f"--diag-suppress={suppress}") if self.diag_warn is not None: if isinstance(self.diag_warn, int): - self._formatted_options.append(f"--diag-warn={self.diag_warn}") + options.append(f"--diag-warn={self.diag_warn}") elif is_sequence(self.diag_warn): for warn in self.diag_warn: - self._formatted_options.append(f"--diag-warn={warn}") + options.append(f"--diag-warn={warn}") if self.brief_diagnostics is not None: - self._formatted_options.append(f"--brief-diagnostics={_handle_boolean_option(self.brief_diagnostics)}") + options.append(f"--brief-diagnostics={_handle_boolean_option(self.brief_diagnostics)}") if self.time is not None: - self._formatted_options.append(f"--time={self.time}") + options.append(f"--time={self.time}") if self.split_compile is not None: - self._formatted_options.append(f"--split-compile={self.split_compile}") + options.append(f"--split-compile={self.split_compile}") if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: - self._formatted_options.append("--fdevice-syntax-only") + options.append("--fdevice-syntax-only") if self.minimal is not None and self.minimal: - self._formatted_options.append("--minimal") + options.append("--minimal") if self.numba_debug: - self._formatted_options.append("--numba-debug") - - def _prepare_nvrtc_options(self) -> list[bytes]: - # NVRTC uses all the formatted options that were set in __post_init__ - # All options in _formatted_options are already NVRTC-compatible - return list(o.encode() for o in self._formatted_options) + options.append("--numba-debug") + return list(o.encode() for o in options) def _prepare_nvjitlink_options(self) -> list[bytes]: options = [] @@ -662,8 +652,7 @@ def as_bytes(self, backend: str) -> list[bytes]: raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'") def __repr__(self): - # __TODO__ improve this - return str(self._formatted_options) + return f"ProgramOptions(name={self.name!r}, arch={self.arch!r})" ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] From 32fa9303990ab28df8dccda44541b48c30e279cf Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 13 Dec 2025 02:10:53 +0000 Subject: [PATCH 11/15] fix & expand tests --- cuda_core/cuda/core/experimental/_program.py | 2 -- cuda_core/tests/test_program.py | 9 +++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 28e4e12eb2..d19756d5dd 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -553,8 +553,6 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis unsupported.append("use_fast_math") if self.extra_device_vectorization is not None and self.extra_device_vectorization: unsupported.append("extra_device_vectorization") - if self.link_time_optimization is not None and self.link_time_optimization: - unsupported.append("link_time_optimization") if self.gen_opt_lto is not None and self.gen_opt_lto: unsupported.append("gen_opt_lto") if self.define_macro is not None: diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 540fd94b0b..ae3d5ab559 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -220,6 +220,15 @@ def ptx_code_object(): ProgramOptions(diag_warn=1000), ProgramOptions(std="c++11", ptxas_options=["-v"]), ProgramOptions(std="c++11", ptxas_options=["-v", "-O2"]), + ProgramOptions(brief_diagnostics=True), + ProgramOptions(builtin_move_forward=False), + ProgramOptions(extensible_whole_program=True), + ProgramOptions(fdevice_syntax_only=True), + ProgramOptions(gen_opt_lto=True), + ProgramOptions(minimal=True), + ProgramOptions(no_source_include=True), + # TODO: Add test for pre_include once we have a suitable header in the test environment + # ProgramOptions(pre_include="cuda_runtime.h"), ], ) def test_cpp_program_with_various_options(init_cuda, options): From e388a9ea3f686a99372e47565ee033f4ff076e91 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 13 Dec 2025 02:50:16 +0000 Subject: [PATCH 12/15] cover new NVRTC options --- cuda_core/cuda/core/experimental/_program.py | 75 ++++++++++++++++++++ cuda_core/tests/test_program.py | 62 ++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index d19756d5dd..adce5dbac5 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -256,6 +256,42 @@ class ProgramOptions: minimal : bool, optional Omit certain language features to reduce compile time for small programs. Default: False + no_cache : bool, optional + Disable compiler caching. + Default: False + fdevice_time_trace : str, optional + Generate time trace JSON for profiling compilation (NVRTC only). + Default: None + device_float128 : bool, optional + Allow __float128 type in device code (NVRTC only). + Default: False + frandom_seed : str, optional + Set random seed for randomized optimizations (NVRTC only). + Default: None + ofast_compile : str, optional + Fast compilation mode: "0", "min", "mid", or "max" (NVRTC only). + Default: None + pch : bool, optional + Use default precompiled header (NVRTC only, CUDA 12.8+). + Default: False + create_pch : str, optional + Create precompiled header file (NVRTC only, CUDA 12.8+). + Default: None + use_pch : str, optional + Use specific precompiled header file (NVRTC only, CUDA 12.8+). + Default: None + pch_dir : str, optional + PCH directory location (NVRTC only, CUDA 12.8+). + Default: None + pch_verbose : bool, optional + Verbose PCH output (NVRTC only, CUDA 12.8+). + Default: False + pch_messages : bool, optional + Control PCH diagnostic messages (NVRTC only, CUDA 12.8+). + Default: False + instantiate_templates_in_pch : bool, optional + Control template instantiation in PCH (NVRTC only, CUDA 12.8+). + Default: False """ name: str | None = "" @@ -299,6 +335,18 @@ class ProgramOptions: split_compile: int | None = None fdevice_syntax_only: bool | None = None minimal: bool | None = None + no_cache: bool | None = None + fdevice_time_trace: str | None = None + device_float128: bool | None = None + frandom_seed: str | None = None + ofast_compile: str | None = None + pch: bool | None = None + create_pch: str | None = None + use_pch: str | None = None + pch_dir: str | None = None + pch_verbose: bool | None = None + pch_messages: bool | None = None + instantiate_templates_in_pch: bool | None = None numba_debug: bool | None = None # Custom option for Numba debugging def __post_init__(self): @@ -382,6 +430,8 @@ def _prepare_nvrtc_options(self) -> list[bytes]: options.append("--device-as-default-execution-space") if self.device_int128 is not None and self.device_int128: options.append("--device-int128") + if self.device_float128 is not None and self.device_float128: + options.append("--device-float128") if self.optimization_info is not None: options.append(f"--optimization-info={self.optimization_info}") if self.no_display_error_number is not None and self.no_display_error_number: @@ -414,6 +464,31 @@ def _prepare_nvrtc_options(self) -> list[bytes]: options.append("--fdevice-syntax-only") if self.minimal is not None and self.minimal: options.append("--minimal") + if self.no_cache is not None and self.no_cache: + options.append("--no-cache") + if self.fdevice_time_trace is not None: + options.append(f"--fdevice-time-trace={self.fdevice_time_trace}") + if self.frandom_seed is not None: + options.append(f"--frandom-seed={self.frandom_seed}") + if self.ofast_compile is not None: + options.append(f"--Ofast-compile={self.ofast_compile}") + # PCH options (CUDA 12.8+) + if self.pch is not None and self.pch: + options.append("--pch") + if self.create_pch is not None: + options.append(f"--create-pch={self.create_pch}") + if self.use_pch is not None: + options.append(f"--use-pch={self.use_pch}") + if self.pch_dir is not None: + options.append(f"--pch-dir={self.pch_dir}") + if self.pch_verbose is not None: + options.append(f"--pch-verbose={_handle_boolean_option(self.pch_verbose)}") + if self.pch_messages is not None: + options.append(f"--pch-messages={_handle_boolean_option(self.pch_messages)}") + if self.instantiate_templates_in_pch is not None: + options.append( + f"--instantiate-templates-in-pch={_handle_boolean_option(self.instantiate_templates_in_pch)}" + ) if self.numba_debug: options.append("--numba-debug") return list(o.encode() for o in options) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index ae3d5ab559..1da2372317 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -7,6 +7,7 @@ import pytest from cuda.core.experimental import _linker +from cuda.core.experimental._device import Device from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, handle_return @@ -229,6 +230,67 @@ def ptx_code_object(): ProgramOptions(no_source_include=True), # TODO: Add test for pre_include once we have a suitable header in the test environment # ProgramOptions(pre_include="cuda_runtime.h"), + ProgramOptions(no_cache=True), + ProgramOptions(fdevice_time_trace="trace.json"), + pytest.param( + ProgramOptions(arch="sm_100", device_float128=True), + marks=pytest.mark.skipif( + Device().compute_capability < (100, 0), + reason="device_float128 requires sm_100 or later", + ), + ), + ProgramOptions(frandom_seed="12345"), + ProgramOptions(ofast_compile="min"), + pytest.param( + ProgramOptions(pch=True), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), + pytest.param( + ProgramOptions(create_pch="test.pch"), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), + pytest.param( + ProgramOptions(use_pch="test.pch"), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), + # TODO: pch_dir requires actual PCH directory to exist - needs integration test + # pytest.param( + # ProgramOptions(pch_dir="/tmp/pch"), + # marks=pytest.mark.skipif( + # (_get_nvrtc_version_for_tests() or 0) < 12800, + # reason="PCH requires NVRTC >= 12.8", + # ), + # ), + pytest.param( + ProgramOptions(pch_verbose=True), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), + pytest.param( + ProgramOptions(pch_messages=False), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), + pytest.param( + ProgramOptions(instantiate_templates_in_pch=True), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 12800, + reason="PCH requires NVRTC >= 12.8", + ), + ), ], ) def test_cpp_program_with_various_options(init_cuda, options): From 15f4b6a52712c0d7af5cb51fa9dd175bf7e22705 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 13 Dec 2025 04:59:53 +0000 Subject: [PATCH 13/15] fix linker options handling --- cuda_core/cuda/core/experimental/_linker.py | 150 ++++++++++++------- cuda_core/cuda/core/experimental/_program.py | 104 +------------ cuda_core/tests/test_linker.py | 33 ++++ cuda_core/tests/test_program.py | 24 --- 4 files changed, 130 insertions(+), 181 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 5c54a88c8c..2c94fb9b02 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -202,74 +202,78 @@ class LinkerOptions: def __post_init__(self): _lazy_init() self._name = self.name.encode() - self.formatted_options = [] - if _nvjitlink: - self._init_nvjitlink() - else: - self._init_driver() - def _init_nvjitlink(self): + def _prepare_nvjitlink_options(self, as_bytes: bool = False) -> Union[list[bytes], list[str]]: + options = [] + if self.arch is not None: - self.formatted_options.append(f"-arch={self.arch}") + options.append(f"-arch={self.arch}") else: - self.formatted_options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability)) + options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability)) if self.max_register_count is not None: - self.formatted_options.append(f"-maxrregcount={self.max_register_count}") + options.append(f"-maxrregcount={self.max_register_count}") if self.time is not None: - self.formatted_options.append("-time") + options.append("-time") if self.verbose: - self.formatted_options.append("-verbose") + options.append("-verbose") if self.link_time_optimization: - self.formatted_options.append("-lto") + options.append("-lto") if self.ptx: - self.formatted_options.append("-ptx") + options.append("-ptx") if self.optimization_level is not None: - self.formatted_options.append(f"-O{self.optimization_level}") + options.append(f"-O{self.optimization_level}") if self.debug: - self.formatted_options.append("-g") + options.append("-g") if self.lineinfo: - self.formatted_options.append("-lineinfo") + options.append("-lineinfo") if self.ftz is not None: - self.formatted_options.append(f"-ftz={'true' if self.ftz else 'false'}") + options.append(f"-ftz={'true' if self.ftz else 'false'}") if self.prec_div is not None: - self.formatted_options.append(f"-prec-div={'true' if self.prec_div else 'false'}") + options.append(f"-prec-div={'true' if self.prec_div else 'false'}") if self.prec_sqrt is not None: - self.formatted_options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}") + options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}") if self.fma is not None: - self.formatted_options.append(f"-fma={'true' if self.fma else 'false'}") + options.append(f"-fma={'true' if self.fma else 'false'}") if self.kernels_used is not None: if isinstance(self.kernels_used, str): - self.formatted_options.append(f"-kernels-used={self.kernels_used}") + options.append(f"-kernels-used={self.kernels_used}") elif isinstance(self.kernels_used, list): for kernel in self.kernels_used: - self.formatted_options.append(f"-kernels-used={kernel}") + options.append(f"-kernels-used={kernel}") if self.variables_used is not None: if isinstance(self.variables_used, str): - self.formatted_options.append(f"-variables-used={self.variables_used}") + options.append(f"-variables-used={self.variables_used}") elif isinstance(self.variables_used, list): for variable in self.variables_used: - self.formatted_options.append(f"-variables-used={variable}") + options.append(f"-variables-used={variable}") if self.optimize_unused_variables is not None: - self.formatted_options.append("-optimize-unused-variables") + options.append("-optimize-unused-variables") if self.ptxas_options is not None: if isinstance(self.ptxas_options, str): - self.formatted_options.append(f"-Xptxas={self.ptxas_options}") + options.append(f"-Xptxas={self.ptxas_options}") elif is_sequence(self.ptxas_options): for opt in self.ptxas_options: - self.formatted_options.append(f"-Xptxas={opt}") + options.append(f"-Xptxas={opt}") if self.split_compile is not None: - self.formatted_options.append(f"-split-compile={self.split_compile}") + options.append(f"-split-compile={self.split_compile}") if self.split_compile_extended is not None: - self.formatted_options.append(f"-split-compile-extended={self.split_compile_extended}") + options.append(f"-split-compile-extended={self.split_compile_extended}") if self.no_cache is True: - self.formatted_options.append("-no-cache") + options.append("-no-cache") + + if as_bytes: + return [o.encode() for o in options] + else: + return options + + def _prepare_driver_options(self) -> tuple[list, list]: + formatted_options = [] + option_keys = [] - def _init_driver(self): - self.option_keys = [] # allocate 4 KiB each for info/error logs size = 4194304 - self.formatted_options.extend((bytearray(size), size, bytearray(size), size)) - self.option_keys.extend( + formatted_options.extend((bytearray(size), size, bytearray(size), size)) + option_keys.extend( ( _driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER, _driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, @@ -280,30 +284,30 @@ def _init_driver(self): if self.arch is not None: arch = self.arch.split("_")[-1].upper() - self.formatted_options.append(getattr(_driver.CUjit_target, f"CU_TARGET_COMPUTE_{arch}")) - self.option_keys.append(_driver.CUjit_option.CU_JIT_TARGET) + formatted_options.append(getattr(_driver.CUjit_target, f"CU_TARGET_COMPUTE_{arch}")) + option_keys.append(_driver.CUjit_option.CU_JIT_TARGET) if self.max_register_count is not None: - self.formatted_options.append(self.max_register_count) - self.option_keys.append(_driver.CUjit_option.CU_JIT_MAX_REGISTERS) + formatted_options.append(self.max_register_count) + option_keys.append(_driver.CUjit_option.CU_JIT_MAX_REGISTERS) if self.time is not None: raise ValueError("time option is not supported by the driver API") if self.verbose: - self.formatted_options.append(1) - self.option_keys.append(_driver.CUjit_option.CU_JIT_LOG_VERBOSE) + formatted_options.append(1) + option_keys.append(_driver.CUjit_option.CU_JIT_LOG_VERBOSE) if self.link_time_optimization: - self.formatted_options.append(1) - self.option_keys.append(_driver.CUjit_option.CU_JIT_LTO) + formatted_options.append(1) + option_keys.append(_driver.CUjit_option.CU_JIT_LTO) if self.ptx: raise ValueError("ptx option is not supported by the driver API") if self.optimization_level is not None: - self.formatted_options.append(self.optimization_level) - self.option_keys.append(_driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL) + formatted_options.append(self.optimization_level) + option_keys.append(_driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL) if self.debug: - self.formatted_options.append(1) - self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO) + formatted_options.append(1) + option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO) if self.lineinfo: - self.formatted_options.append(1) - self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO) + formatted_options.append(1) + option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO) if self.ftz is not None: warn("ftz option is deprecated in the driver API", DeprecationWarning, stacklevel=3) if self.prec_div is not None: @@ -325,8 +329,37 @@ def _init_driver(self): if self.split_compile_extended is not None: raise ValueError("split_compile_extended option is not supported by the driver API") if self.no_cache is True: - self.formatted_options.append(_driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE) - self.option_keys.append(_driver.CUjit_option.CU_JIT_CACHE_MODE) + formatted_options.append(_driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE) + option_keys.append(_driver.CUjit_option.CU_JIT_CACHE_MODE) + + return formatted_options, option_keys + + def as_bytes(self, backend: str = "nvjitlink") -> list[bytes]: + """Convert linker options to bytes format for the nvjitlink backend. + + Parameters + ---------- + backend : str, optional + The linker backend. Only "nvjitlink" is supported. Default is "nvjitlink". + + Returns + ------- + list[bytes] + List of option strings encoded as bytes. + + Raises + ------ + ValueError + If an unsupported backend is specified. + RuntimeError + If nvJitLink backend is not available. + """ + backend = backend.lower() + if backend != "nvjitlink": + raise ValueError(f"as_bytes() only supports 'nvjitlink' backend, got '{backend}'") + if not _nvjitlink: + raise RuntimeError("nvJitLink backend is not available") + return self._prepare_nvjitlink_options(as_bytes=True) # This needs to be a free function not a method, as it's disallowed by contextmanager. @@ -369,7 +402,7 @@ class Linker: """ class _MembersNeededForFinalize: - __slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive") + __slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive", "formatted_options", "option_keys") def __init__(self, program_obj, handle, use_nvjitlink): self.handle = handle @@ -394,14 +427,17 @@ def __init__(self, *object_codes: ObjectCode, options: LinkerOptions = None): self._options = options = check_or_create_options(LinkerOptions, options, "Linker options") with _exception_manager(self): if _nvjitlink: - handle = _nvjitlink.create(len(options.formatted_options), options.formatted_options) + formatted_options = options._prepare_nvjitlink_options(as_bytes=False) + handle = _nvjitlink.create(len(formatted_options), formatted_options) use_nvjitlink = True else: - handle = handle_return( - _driver.cuLinkCreate(len(options.formatted_options), options.option_keys, options.formatted_options) - ) + formatted_options, option_keys = options._prepare_driver_options() + handle = handle_return(_driver.cuLinkCreate(len(formatted_options), option_keys, formatted_options)) use_nvjitlink = False self._mnff = Linker._MembersNeededForFinalize(self, handle, use_nvjitlink) + self._mnff.formatted_options = formatted_options # Store for log access + if not _nvjitlink: + self._mnff.option_keys = option_keys for code in object_codes: assert_type(code, ObjectCode) @@ -508,7 +544,7 @@ def get_error_log(self) -> str: log = bytearray(log_size) _nvjitlink.get_error_log(self._mnff.handle, log) else: - log = self._options.formatted_options[2] + log = self._mnff.formatted_options[2] return log.decode("utf-8", errors="backslashreplace") def get_info_log(self) -> str: @@ -524,7 +560,7 @@ def get_info_log(self) -> str: log = bytearray(log_size) _nvjitlink.get_info_log(self._mnff.handle, log) else: - log = self._options.formatted_options[0] + log = self._mnff.formatted_options[0] return log.decode("utf-8", errors="backslashreplace") def _input_type_from_code_type(self, code_type: str): diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index adce5dbac5..a287131007 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -493,100 +493,6 @@ def _prepare_nvrtc_options(self) -> list[bytes]: options.append("--numba-debug") return list(o.encode() for o in options) - def _prepare_nvjitlink_options(self) -> list[bytes]: - options = [] - - # arch is always set - assert self.arch is not None - options.append(f"-arch={self.arch}") - if self.max_register_count is not None: - options.append(f"-maxrregcount={self.max_register_count}") - if self.time is not None: - options.append("-time") - if self.debug is not None and self.debug: - options.append("-g") - if self.lineinfo is not None and self.lineinfo: - options.append("-lineinfo") - if self.ftz is not None: - options.append(f"-ftz={'true' if self.ftz else 'false'}") - if self.prec_div is not None: - options.append(f"-prec-div={'true' if self.prec_div else 'false'}") - if self.prec_sqrt is not None: - options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}") - if self.fma is not None: - options.append(f"-fma={'true' if self.fma else 'false'}") - if self.link_time_optimization is not None and self.link_time_optimization: - options.append("-lto") - if self.ptxas_options is not None: - if isinstance(self.ptxas_options, str): - options.append(f"-Xptxas={self.ptxas_options}") - elif is_sequence(self.ptxas_options): - for opt in self.ptxas_options: - options.append(f"-Xptxas={opt}") - if self.split_compile is not None: - options.append(f"-split-compile={self.split_compile}") - - # Check for unsupported options and raise error if they are set - unsupported = [] - if self.relocatable_device_code is not None: - unsupported.append("relocatable_device_code") - if self.extensible_whole_program is not None and self.extensible_whole_program: - unsupported.append("extensible_whole_program") - if self.device_code_optimize is not None: - unsupported.append("device_code_optimize") - if self.use_fast_math is not None and self.use_fast_math: - unsupported.append("use_fast_math") - if self.extra_device_vectorization is not None and self.extra_device_vectorization: - unsupported.append("extra_device_vectorization") - if self.gen_opt_lto is not None and self.gen_opt_lto: - unsupported.append("gen_opt_lto") - if self.define_macro is not None: - unsupported.append("define_macro") - if self.undefine_macro is not None: - unsupported.append("undefine_macro") - if self.include_path is not None: - unsupported.append("include_path") - if self.pre_include is not None: - unsupported.append("pre_include") - if self.no_source_include is not None and self.no_source_include: - unsupported.append("no_source_include") - if self.std is not None: - unsupported.append("std") - if self.builtin_move_forward is not None: - unsupported.append("builtin_move_forward") - if self.builtin_initializer_list is not None: - unsupported.append("builtin_initializer_list") - if self.disable_warnings is not None and self.disable_warnings: - unsupported.append("disable_warnings") - if self.restrict is not None and self.restrict: - unsupported.append("restrict") - if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: - unsupported.append("device_as_default_execution_space") - if self.device_int128 is not None and self.device_int128: - unsupported.append("device_int128") - if self.optimization_info is not None: - unsupported.append("optimization_info") - if self.no_display_error_number is not None and self.no_display_error_number: - unsupported.append("no_display_error_number") - if self.diag_error is not None: - unsupported.append("diag_error") - if self.diag_suppress is not None: - unsupported.append("diag_suppress") - if self.diag_warn is not None: - unsupported.append("diag_warn") - if self.brief_diagnostics is not None: - unsupported.append("brief_diagnostics") - if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: - unsupported.append("fdevice_syntax_only") - if self.minimal is not None and self.minimal: - unsupported.append("minimal") - if self.numba_debug is not None and self.numba_debug: - unsupported.append("numba_debug") - if unsupported: - raise CUDAError(f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}") - - return list(o.encode() for o in options) - def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: options = [] @@ -694,8 +600,7 @@ def as_bytes(self, backend: str) -> list[bytes]: Parameters ---------- backend : str - The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink", - or "nvvm". + The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm". Returns ------- @@ -717,12 +622,10 @@ def as_bytes(self, backend: str) -> list[bytes]: backend = backend.lower() if backend == "nvrtc": return self._prepare_nvrtc_options() - elif backend == "nvjitlink": - return self._prepare_nvjitlink_options() elif backend == "nvvm": return self._prepare_nvvm_options(as_bytes=True) else: - raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'") + raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvvm'") def __repr__(self): return f"ProgramOptions(name={self.name!r}, arch={self.arch!r})" @@ -816,15 +719,16 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: arch=options.arch, max_register_count=options.max_register_count, time=options.time, + link_time_optimization=options.link_time_optimization, debug=options.debug, lineinfo=options.lineinfo, ftz=options.ftz, prec_div=options.prec_div, prec_sqrt=options.prec_sqrt, fma=options.fma, - link_time_optimization=options.link_time_optimization, split_compile=options.split_compile, ptxas_options=options.ptxas_options, + no_cache=options.no_cache, ) def close(self): diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index e0c8d37b65..b7af4b6ab7 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -173,3 +173,36 @@ def test_linker_get_info_log(compile_ptx_functions): linker.link("cubin") log = linker.get_info_log() assert isinstance(log, str) + + +@pytest.mark.skipif(is_culink_backend, reason="as_bytes() only supported for nvjitlink backend") +def test_linker_options_as_bytes_nvjitlink(): + """Test LinkerOptions.as_bytes() for nvJitLink backend""" + options = LinkerOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32) + nvjitlink_options = options.as_bytes("nvjitlink") + + # Should return list of bytes + assert isinstance(nvjitlink_options, list) + assert all(isinstance(opt, bytes) for opt in nvjitlink_options) + + # Decode to check content + options_str = [opt.decode() for opt in nvjitlink_options] + assert "-arch=sm_80" in options_str + assert "-g" in options_str + assert "-ftz=true" in options_str + assert "-maxrregcount=32" in options_str + + +def test_linker_options_as_bytes_invalid_backend(): + """Test LinkerOptions.as_bytes() with invalid backend""" + options = LinkerOptions(arch="sm_80") + with pytest.raises(ValueError, match="only supports 'nvjitlink' backend"): + options.as_bytes("invalid") + + +@pytest.mark.skipif(not is_culink_backend, reason="driver backend test") +def test_linker_options_as_bytes_driver_not_supported(): + """Test that as_bytes() is not supported for driver backend""" + options = LinkerOptions(arch="sm_80") + with pytest.raises(RuntimeError, match="as_bytes\\(\\) only supports 'nvjitlink' backend"): + options.as_bytes("driver") diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 1da2372317..d576c9208a 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -501,23 +501,6 @@ def test_program_options_as_bytes_nvrtc(): assert "--ftz=true" in options_str -def test_program_options_as_bytes_nvjitlink(): - """Test ProgramOptions.as_bytes() for nvJitLink backend""" - options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32) - nvjitlink_options = options.as_bytes("nvjitlink") - - # Should return list of bytes - assert isinstance(nvjitlink_options, list) - assert all(isinstance(opt, bytes) for opt in nvjitlink_options) - - # Decode to check content - options_str = [opt.decode() for opt in nvjitlink_options] - assert "-arch=sm_80" in options_str - assert "-g" in options_str - assert "-ftz=true" in options_str - assert "-maxrregcount=32" in options_str - - @nvvm_available def test_program_options_as_bytes_nvvm(): """Test ProgramOptions.as_bytes() for NVVM backend""" @@ -543,13 +526,6 @@ def test_program_options_as_bytes_invalid_backend(): options.as_bytes("invalid") -def test_program_options_as_bytes_nvjitlink_unsupported_option(): - """Test that unsupported options raise CUDAError for nvJitLink backend""" - options = ProgramOptions(arch="sm_80", std="c++17") - with pytest.raises(CUDAError, match="not supported by nvJitLink backend"): - options.as_bytes("nvjitlink") - - @nvvm_available def test_program_options_as_bytes_nvvm_unsupported_option(): """Test that unsupported options raise CUDAError for NVVM backend""" From bb74621220a526510e14b1d09ee59029dedf0e49 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 14 Dec 2025 20:12:54 +0000 Subject: [PATCH 14/15] fix two NVRTC bugs - the program name is used for pch filename, but on Windows it is problematic - trace.json could not be properly created with NVRTC 12.9 --- cuda_core/cuda/core/experimental/_program.py | 2 +- cuda_core/tests/test_program.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index a287131007..b49ebeb64d 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -294,7 +294,7 @@ class ProgramOptions: Default: False """ - name: str | None = "" + name: str | None = "default_program" arch: str | None = None relocatable_device_code: bool | None = None extensible_whole_program: bool | None = None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index d576c9208a..2b0ac5d617 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -231,7 +231,13 @@ def ptx_code_object(): # TODO: Add test for pre_include once we have a suitable header in the test environment # ProgramOptions(pre_include="cuda_runtime.h"), ProgramOptions(no_cache=True), - ProgramOptions(fdevice_time_trace="trace.json"), + pytest.param( + ProgramOptions(fdevice_time_trace="trace.json"), + marks=pytest.mark.skipif( + (_get_nvrtc_version_for_tests() or 0) < 13000, + reason="buggy with NVRTC < 13.0 (File 'trace.json.json' could not be opened)", + ), + ), pytest.param( ProgramOptions(arch="sm_100", device_float128=True), marks=pytest.mark.skipif( From eda06ff711a1b2bc457f17e3f13c332c8b6b53e3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 15 Dec 2025 13:10:39 -0500 Subject: [PATCH 15/15] Apply suggestions from code review Co-authored-by: Keith Kraus --- cuda_core/cuda/core/experimental/_program.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index b49ebeb64d..f3ad9af644 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -357,8 +357,7 @@ def __post_init__(self): def _prepare_nvrtc_options(self) -> list[bytes]: # Build NVRTC-specific options - options = [] - options.append(f"-arch={self.arch}") + options = [f"-arch={self.arch}"] if self.relocatable_device_code is not None: options.append(f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}") if self.extensible_whole_program is not None and self.extensible_whole_program: @@ -491,7 +490,7 @@ def _prepare_nvrtc_options(self) -> list[bytes]: ) if self.numba_debug: options.append("--numba-debug") - return list(o.encode() for o in options) + return [o.encode() for o in options] def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]: options = [] @@ -586,7 +585,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}") if as_bytes: - return list(o.encode() for o in options) + return [o.encode() for o in options] else: return options