From fb004b6bb4822e388d04a28856109a31c67c11ef Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 9 Dec 2025 23:11:08 +0000
Subject: [PATCH 01/15] Initial plan


From c8ac3ea1f77a3c2629d8c611117b2a75c5a94c4d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 9 Dec 2025 23:19:56 +0000
Subject: [PATCH 02/15] Add as_bytes() public API with backend-specific option
 preparation

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 350 +++++++++++++++++--
 cuda_core/tests/test_program.py              |  73 ++++
 2 files changed, 398 insertions(+), 25 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index cdef7c3be6..696d016a28 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -18,6 +18,7 @@
 from cuda.core.experimental._module import ObjectCode
 from cuda.core.experimental._utils.clear_error_support import assert_type
 from cuda.core.experimental._utils.cuda_utils import (
+    CUDAError,
     _handle_boolean_option,
     check_or_create_options,
     driver,
@@ -422,7 +423,326 @@ def __post_init__(self):
         if self.numba_debug:
             self._formatted_options.append("--numba-debug")
 
+    def _prepare_nvrtc_options(self) -> list[bytes]:
+        """Prepare options for NVRTC backend.
+        
+        This method transforms the formatted options into bytes suitable for NVRTC compilation.
+        It validates that only NVRTC-compatible options are set and raises CUDAError for
+        unsupported options.
+        
+        Returns
+        -------
+        list[bytes]
+            List of option strings encoded as bytes for NVRTC.
+            
+        Raises
+        ------
+        CUDAError
+            If an option incompatible with NVRTC is set.
+        """
+        # NVRTC uses all the formatted options that were set in __post_init__
+        # All options in _formatted_options are already NVRTC-compatible
+        return list(o.encode() for o in self._formatted_options)
+
+    def _prepare_nvjitlink_options(self) -> list[bytes]:
+        """Prepare options for nvJitLink backend.
+        
+        This method transforms the ProgramOptions into options suitable for nvJitLink linking.
+        It validates that only nvJitLink-compatible options are set and raises CUDAError for
+        unsupported options.
+        
+        Returns
+        -------
+        list[bytes]
+            List of option strings encoded as bytes for nvJitLink.
+            
+        Raises
+        ------
+        CUDAError
+            If an option incompatible with nvJitLink is set.
+        """
+        options = []
+        
+        # Options supported by nvJitLink (subset of ProgramOptions)
+        # Based on LinkerOptions._init_nvjitlink() in _linker.py
+        
+        # arch is always set
+        assert self.arch is not None
+        options.append(f"-arch={self.arch}")
+        
+        if self.max_register_count is not None:
+            options.append(f"-maxrregcount={self.max_register_count}")
+        
+        if self.time is not None:
+            options.append("-time")
+        
+        if self.debug is not None and self.debug:
+            options.append("-g")
+        
+        if self.lineinfo is not None and self.lineinfo:
+            options.append("-lineinfo")
+        
+        if self.ftz is not None:
+            options.append(f"-ftz={'true' if self.ftz else 'false'}")
+        
+        if self.prec_div is not None:
+            options.append(f"-prec-div={'true' if self.prec_div else 'false'}")
+        
+        if self.prec_sqrt is not None:
+            options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}")
+        
+        if self.fma is not None:
+            options.append(f"-fma={'true' if self.fma else 'false'}")
+        
+        if self.link_time_optimization is not None and self.link_time_optimization:
+            options.append("-lto")
+        
+        if self.ptxas_options is not None:
+            if isinstance(self.ptxas_options, str):
+                options.append(f"-Xptxas={self.ptxas_options}")
+            elif is_sequence(self.ptxas_options):
+                for opt in self.ptxas_options:
+                    options.append(f"-Xptxas={opt}")
+        
+        if self.split_compile is not None:
+            options.append(f"-split-compile={self.split_compile}")
+        
+        # Check for unsupported options and raise error if they are set
+        unsupported = []
+        if self.relocatable_device_code is not None:
+            unsupported.append("relocatable_device_code")
+        if self.extensible_whole_program is not None and self.extensible_whole_program:
+            unsupported.append("extensible_whole_program")
+        if self.device_code_optimize is not None:
+            unsupported.append("device_code_optimize")
+        if self.use_fast_math is not None and self.use_fast_math:
+            unsupported.append("use_fast_math")
+        if self.extra_device_vectorization is not None and self.extra_device_vectorization:
+            unsupported.append("extra_device_vectorization")
+        if self.gen_opt_lto is not None and self.gen_opt_lto:
+            unsupported.append("gen_opt_lto")
+        if self.define_macro is not None:
+            unsupported.append("define_macro")
+        if self.undefine_macro is not None:
+            unsupported.append("undefine_macro")
+        if self.include_path is not None:
+            unsupported.append("include_path")
+        if self.pre_include is not None:
+            unsupported.append("pre_include")
+        if self.no_source_include is not None and self.no_source_include:
+            unsupported.append("no_source_include")
+        if self.std is not None:
+            unsupported.append("std")
+        if self.builtin_move_forward is not None:
+            unsupported.append("builtin_move_forward")
+        if self.builtin_initializer_list is not None:
+            unsupported.append("builtin_initializer_list")
+        if self.disable_warnings is not None and self.disable_warnings:
+            unsupported.append("disable_warnings")
+        if self.restrict is not None and self.restrict:
+            unsupported.append("restrict")
+        if self.device_as_default_execution_space is not None and self.device_as_default_execution_space:
+            unsupported.append("device_as_default_execution_space")
+        if self.device_int128 is not None and self.device_int128:
+            unsupported.append("device_int128")
+        if self.optimization_info is not None:
+            unsupported.append("optimization_info")
+        if self.no_display_error_number is not None and self.no_display_error_number:
+            unsupported.append("no_display_error_number")
+        if self.diag_error is not None:
+            unsupported.append("diag_error")
+        if self.diag_suppress is not None:
+            unsupported.append("diag_suppress")
+        if self.diag_warn is not None:
+            unsupported.append("diag_warn")
+        if self.brief_diagnostics is not None:
+            unsupported.append("brief_diagnostics")
+        if self.fdevice_syntax_only is not None and self.fdevice_syntax_only:
+            unsupported.append("fdevice_syntax_only")
+        if self.minimal is not None and self.minimal:
+            unsupported.append("minimal")
+        if self.numba_debug is not None and self.numba_debug:
+            unsupported.append("numba_debug")
+        
+        if unsupported:
+            raise CUDAError(
+                f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}"
+            )
+        
+        return list(o.encode() for o in options)
+
+    def _prepare_nvvm_options(self) -> list[str]:
+        """Prepare options for NVVM backend.
+        
+        This method transforms the ProgramOptions into options suitable for NVVM compilation.
+        It validates that only NVVM-compatible options are set and raises CUDAError for
+        unsupported options.
+        
+        Returns
+        -------
+        list[str]
+            List of option strings for NVVM (not encoded as bytes).
+            
+        Raises
+        ------
+        CUDAError
+            If an option incompatible with NVVM is set.
+        """
+        options = []
+        
+        # Options supported by NVVM
+        # Based on _translate_program_options_to_nvvm() method
+        
+        assert self.arch is not None
+        arch = self.arch
+        if arch.startswith("sm_"):
+            arch = f"compute_{arch[3:]}"
+        options.append(f"-arch={arch}")
+        
+        if self.debug is not None and self.debug:
+            options.append("-g")
+        
+        if self.device_code_optimize is False:
+            options.append("-opt=0")
+        elif self.device_code_optimize is True:
+            options.append("-opt=3")
+        
+        # NVVM uses 0/1 instead of true/false for boolean options
+        if self.ftz is not None:
+            options.append(f"-ftz={'1' if self.ftz else '0'}")
+        
+        if self.prec_sqrt is not None:
+            options.append(f"-prec-sqrt={'1' if self.prec_sqrt else '0'}")
+        
+        if self.prec_div is not None:
+            options.append(f"-prec-div={'1' if self.prec_div else '0'}")
+        
+        if self.fma is not None:
+            options.append(f"-fma={'1' if self.fma else '0'}")
+        
+        # Check for unsupported options and raise error if they are set
+        unsupported = []
+        if self.relocatable_device_code is not None:
+            unsupported.append("relocatable_device_code")
+        if self.extensible_whole_program is not None and self.extensible_whole_program:
+            unsupported.append("extensible_whole_program")
+        if self.lineinfo is not None and self.lineinfo:
+            unsupported.append("lineinfo")
+        if self.ptxas_options is not None:
+            unsupported.append("ptxas_options")
+        if self.max_register_count is not None:
+            unsupported.append("max_register_count")
+        if self.use_fast_math is not None and self.use_fast_math:
+            unsupported.append("use_fast_math")
+        if self.extra_device_vectorization is not None and self.extra_device_vectorization:
+            unsupported.append("extra_device_vectorization")
+        if self.link_time_optimization is not None and self.link_time_optimization:
+            unsupported.append("link_time_optimization")
+        if self.gen_opt_lto is not None and self.gen_opt_lto:
+            unsupported.append("gen_opt_lto")
+        if self.define_macro is not None:
+            unsupported.append("define_macro")
+        if self.undefine_macro is not None:
+            unsupported.append("undefine_macro")
+        if self.include_path is not None:
+            unsupported.append("include_path")
+        if self.pre_include is not None:
+            unsupported.append("pre_include")
+        if self.no_source_include is not None and self.no_source_include:
+            unsupported.append("no_source_include")
+        if self.std is not None:
+            unsupported.append("std")
+        if self.builtin_move_forward is not None:
+            unsupported.append("builtin_move_forward")
+        if self.builtin_initializer_list is not None:
+            unsupported.append("builtin_initializer_list")
+        if self.disable_warnings is not None and self.disable_warnings:
+            unsupported.append("disable_warnings")
+        if self.restrict is not None and self.restrict:
+            unsupported.append("restrict")
+        if self.device_as_default_execution_space is not None and self.device_as_default_execution_space:
+            unsupported.append("device_as_default_execution_space")
+        if self.device_int128 is not None and self.device_int128:
+            unsupported.append("device_int128")
+        if self.optimization_info is not None:
+            unsupported.append("optimization_info")
+        if self.no_display_error_number is not None and self.no_display_error_number:
+            unsupported.append("no_display_error_number")
+        if self.diag_error is not None:
+            unsupported.append("diag_error")
+        if self.diag_suppress is not None:
+            unsupported.append("diag_suppress")
+        if self.diag_warn is not None:
+            unsupported.append("diag_warn")
+        if self.brief_diagnostics is not None:
+            unsupported.append("brief_diagnostics")
+        if self.time is not None:
+            unsupported.append("time")
+        if self.split_compile is not None:
+            unsupported.append("split_compile")
+        if self.fdevice_syntax_only is not None and self.fdevice_syntax_only:
+            unsupported.append("fdevice_syntax_only")
+        if self.minimal is not None and self.minimal:
+            unsupported.append("minimal")
+        if self.numba_debug is not None and self.numba_debug:
+            unsupported.append("numba_debug")
+        
+        if unsupported:
+            raise CUDAError(
+                f"The following options are not supported by NVVM backend: {', '.join(unsupported)}"
+            )
+        
+        return options
+
+    def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]:
+        """Convert program options to bytes format for the specified backend.
+        
+        This method transforms the program options into a format suitable for the
+        specified compiler backend. Different backends may use different option names
+        and formats even for the same conceptual options.
+        
+        Parameters
+        ----------
+        backend : str
+            The compiler backend to prepare options for. Must be one of:
+            - "nvrtc": NVIDIA Runtime Compilation (NVRTC)
+            - "nvjitlink": NVIDIA JIT Linker
+            - "nvvm": NVIDIA LLVM-based compiler
+        
+        Returns
+        -------
+        Union[list[bytes], list[str]]
+            For "nvrtc" and "nvjitlink": list of option strings encoded as bytes.
+            For "nvvm": list of option strings (not encoded).
+        
+        Raises
+        ------
+        ValueError
+            If an unknown backend is specified.
+        CUDAError
+            If an option incompatible with the specified backend is set.
+        
+        Examples
+        --------
+        >>> options = ProgramOptions(arch="sm_80", debug=True)
+        >>> nvrtc_options = options.as_bytes("nvrtc")
+        >>> nvjitlink_options = options.as_bytes("nvjitlink")
+        >>> nvvm_options = options.as_bytes("nvvm")
+        """
+        backend = backend.lower()
+        if backend == "nvrtc":
+            return self._prepare_nvrtc_options()
+        elif backend == "nvjitlink":
+            return self._prepare_nvjitlink_options()
+        elif backend == "nvvm":
+            return self._prepare_nvvm_options()
+        else:
+            raise ValueError(
+                f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'"
+            )
+
     def _as_bytes(self):
+        """Private method for backward compatibility. Use as_bytes('nvrtc') instead."""
         # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
         return list(o.encode() for o in self._formatted_options)
 
@@ -531,31 +851,11 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
         )
 
     def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]:
-        """Translate ProgramOptions to NVVM-specific compilation options."""
-        nvvm_options = []
-
-        assert options.arch is not None
-        arch = options.arch
-        if arch.startswith("sm_"):
-            arch = f"compute_{arch[3:]}"
-        nvvm_options.append(f"-arch={arch}")
-        if options.debug:
-            nvvm_options.append("-g")
-        if options.device_code_optimize is False:
-            nvvm_options.append("-opt=0")
-        elif options.device_code_optimize is True:
-            nvvm_options.append("-opt=3")
-        # NVVM is not consistent with NVRTC, it uses 0/1 instead...
-        if options.ftz is not None:
-            nvvm_options.append(f"-ftz={'1' if options.ftz else '0'}")
-        if options.prec_sqrt is not None:
-            nvvm_options.append(f"-prec-sqrt={'1' if options.prec_sqrt else '0'}")
-        if options.prec_div is not None:
-            nvvm_options.append(f"-prec-div={'1' if options.prec_div else '0'}")
-        if options.fma is not None:
-            nvvm_options.append(f"-fma={'1' if options.fma else '0'}")
-
-        return nvvm_options
+        """Translate ProgramOptions to NVVM-specific compilation options.
+        
+        This method uses the new _prepare_nvvm_options private method.
+        """
+        return options._prepare_nvvm_options()
 
     def close(self):
         """Destroy this program."""
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 8a6526fcc2..5b5092b8f6 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -411,3 +411,76 @@ def test_nvvm_program_options(init_cuda, nvvm_ir, options):
     assert ".visible .entry simple(" in ptx_text
 
     program.close()
+
+
+def test_program_options_as_bytes_nvrtc():
+    """Test ProgramOptions.as_bytes() for NVRTC backend"""
+    options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True)
+    nvrtc_options = options.as_bytes("nvrtc")
+    
+    # Should return list of bytes
+    assert isinstance(nvrtc_options, list)
+    assert all(isinstance(opt, bytes) for opt in nvrtc_options)
+    
+    # Decode to check content
+    options_str = [opt.decode() for opt in nvrtc_options]
+    assert "-arch=sm_80" in options_str
+    assert "--device-debug" in options_str
+    assert "--generate-line-info" in options_str
+    assert "--ftz=true" in options_str
+
+
+def test_program_options_as_bytes_nvjitlink():
+    """Test ProgramOptions.as_bytes() for nvJitLink backend"""
+    options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32)
+    nvjitlink_options = options.as_bytes("nvjitlink")
+    
+    # Should return list of bytes
+    assert isinstance(nvjitlink_options, list)
+    assert all(isinstance(opt, bytes) for opt in nvjitlink_options)
+    
+    # Decode to check content
+    options_str = [opt.decode() for opt in nvjitlink_options]
+    assert "-arch=sm_80" in options_str
+    assert "-g" in options_str
+    assert "-ftz=true" in options_str
+    assert "-maxrregcount=32" in options_str
+
+
+@nvvm_available
+def test_program_options_as_bytes_nvvm():
+    """Test ProgramOptions.as_bytes() for NVVM backend"""
+    options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True)
+    nvvm_options = options.as_bytes("nvvm")
+    
+    # Should return list of strings (not bytes for NVVM)
+    assert isinstance(nvvm_options, list)
+    assert all(isinstance(opt, str) for opt in nvvm_options)
+    
+    # Check content
+    assert "-arch=compute_80" in nvvm_options
+    assert "-g" in nvvm_options
+    assert "-ftz=1" in nvvm_options
+    assert "-opt=3" in nvvm_options
+
+
+def test_program_options_as_bytes_invalid_backend():
+    """Test ProgramOptions.as_bytes() with invalid backend"""
+    options = ProgramOptions(arch="sm_80")
+    with pytest.raises(ValueError, match="Unknown backend 'invalid'"):
+        options.as_bytes("invalid")
+
+
+def test_program_options_as_bytes_nvjitlink_unsupported_option():
+    """Test that unsupported options raise CUDAError for nvJitLink backend"""
+    options = ProgramOptions(arch="sm_80", std="c++17")
+    with pytest.raises(CUDAError, match="not supported by nvJitLink backend"):
+        options.as_bytes("nvjitlink")
+
+
+@nvvm_available
+def test_program_options_as_bytes_nvvm_unsupported_option():
+    """Test that unsupported options raise CUDAError for NVVM backend"""
+    options = ProgramOptions(arch="sm_80", lineinfo=True)
+    with pytest.raises(CUDAError, match="not supported by NVVM backend"):
+        options.as_bytes("nvvm")

From 7a2a36a1be56c7ae3f48617afbf16a04b251d17b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 02:32:00 +0000
Subject: [PATCH 03/15] Address PR feedback: remove docstrings from private
 methods, make as_bytes always return list[bytes], remove _as_bytes

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 77 +++-----------------
 cuda_core/tests/test_program.py              | 15 ++--
 2 files changed, 19 insertions(+), 73 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 696d016a28..45d27fac84 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -424,43 +424,11 @@ def __post_init__(self):
             self._formatted_options.append("--numba-debug")
 
     def _prepare_nvrtc_options(self) -> list[bytes]:
-        """Prepare options for NVRTC backend.
-        
-        This method transforms the formatted options into bytes suitable for NVRTC compilation.
-        It validates that only NVRTC-compatible options are set and raises CUDAError for
-        unsupported options.
-        
-        Returns
-        -------
-        list[bytes]
-            List of option strings encoded as bytes for NVRTC.
-            
-        Raises
-        ------
-        CUDAError
-            If an option incompatible with NVRTC is set.
-        """
         # NVRTC uses all the formatted options that were set in __post_init__
         # All options in _formatted_options are already NVRTC-compatible
         return list(o.encode() for o in self._formatted_options)
 
     def _prepare_nvjitlink_options(self) -> list[bytes]:
-        """Prepare options for nvJitLink backend.
-        
-        This method transforms the ProgramOptions into options suitable for nvJitLink linking.
-        It validates that only nvJitLink-compatible options are set and raises CUDAError for
-        unsupported options.
-        
-        Returns
-        -------
-        list[bytes]
-            List of option strings encoded as bytes for nvJitLink.
-            
-        Raises
-        ------
-        CUDAError
-            If an option incompatible with nvJitLink is set.
-        """
         options = []
         
         # Options supported by nvJitLink (subset of ProgramOptions)
@@ -571,23 +539,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
         
         return list(o.encode() for o in options)
 
-    def _prepare_nvvm_options(self) -> list[str]:
-        """Prepare options for NVVM backend.
-        
-        This method transforms the ProgramOptions into options suitable for NVVM compilation.
-        It validates that only NVVM-compatible options are set and raises CUDAError for
-        unsupported options.
-        
-        Returns
-        -------
-        list[str]
-            List of option strings for NVVM (not encoded as bytes).
-            
-        Raises
-        ------
-        CUDAError
-            If an option incompatible with NVVM is set.
-        """
+    def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
         options = []
         
         # Options supported by NVVM
@@ -692,9 +644,12 @@ def _prepare_nvvm_options(self) -> list[str]:
                 f"The following options are not supported by NVVM backend: {', '.join(unsupported)}"
             )
         
-        return options
+        if as_bytes:
+            return list(o.encode() for o in options)
+        else:
+            return options
 
-    def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]:
+    def as_bytes(self, backend: str) -> list[bytes]:
         """Convert program options to bytes format for the specified backend.
         
         This method transforms the program options into a format suitable for the
@@ -711,9 +666,8 @@ def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]:
         
         Returns
         -------
-        Union[list[bytes], list[str]]
-            For "nvrtc" and "nvjitlink": list of option strings encoded as bytes.
-            For "nvvm": list of option strings (not encoded).
+        list[bytes]
+            List of option strings encoded as bytes.
         
         Raises
         ------
@@ -735,17 +689,12 @@ def as_bytes(self, backend: str) -> Union[list[bytes], list[str]]:
         elif backend == "nvjitlink":
             return self._prepare_nvjitlink_options()
         elif backend == "nvvm":
-            return self._prepare_nvvm_options()
+            return self._prepare_nvvm_options(as_bytes=True)
         else:
             raise ValueError(
                 f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'"
             )
 
-    def _as_bytes(self):
-        """Private method for backward compatibility. Use as_bytes('nvrtc') instead."""
-        # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
-        return list(o.encode() for o in self._formatted_options)
-
     def __repr__(self):
         # __TODO__ improve this
         return str(self._formatted_options)
@@ -851,11 +800,7 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
         )
 
     def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]:
-        """Translate ProgramOptions to NVVM-specific compilation options.
-        
-        This method uses the new _prepare_nvvm_options private method.
-        """
-        return options._prepare_nvvm_options()
+        return options._prepare_nvvm_options(as_bytes=False)
 
     def close(self):
         """Destroy this program."""
@@ -909,7 +854,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
                         nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()),
                         handle=self._mnff.handle,
                     )
-            options = self._options._as_bytes()
+            options = self._options.as_bytes("nvrtc")
             handle_return(
                 nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options),
                 handle=self._mnff.handle,
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 5b5092b8f6..31897ab786 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -453,15 +453,16 @@ def test_program_options_as_bytes_nvvm():
     options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True)
     nvvm_options = options.as_bytes("nvvm")
     
-    # Should return list of strings (not bytes for NVVM)
+    # Should return list of bytes (same as other backends)
     assert isinstance(nvvm_options, list)
-    assert all(isinstance(opt, str) for opt in nvvm_options)
+    assert all(isinstance(opt, bytes) for opt in nvvm_options)
     
-    # Check content
-    assert "-arch=compute_80" in nvvm_options
-    assert "-g" in nvvm_options
-    assert "-ftz=1" in nvvm_options
-    assert "-opt=3" in nvvm_options
+    # Decode to check content
+    options_str = [opt.decode() for opt in nvvm_options]
+    assert "-arch=compute_80" in options_str
+    assert "-g" in options_str
+    assert "-ftz=1" in options_str
+    assert "-opt=3" in options_str
 
 
 def test_program_options_as_bytes_invalid_backend():

From 2c4a0b620c6a8024c1c43fc75923a8a87acd7cd2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 02:51:59 +0000
Subject: [PATCH 04/15] Remove wrapper methods and use prepare methods directly

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 44 ++++++++------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 45d27fac84..8c2d43e3e7 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -431,9 +431,6 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
     def _prepare_nvjitlink_options(self) -> list[bytes]:
         options = []
         
-        # Options supported by nvJitLink (subset of ProgramOptions)
-        # Based on LinkerOptions._init_nvjitlink() in _linker.py
-        
         # arch is always set
         assert self.arch is not None
         options.append(f"-arch={self.arch}")
@@ -543,7 +540,6 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
         options = []
         
         # Options supported by NVVM
-        # Based on _translate_program_options_to_nvvm() method
         
         assert self.arch is not None
         arch = self.arch
@@ -760,7 +756,22 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
         elif code_type == "ptx":
             assert_type(code, str)
             self._linker = Linker(
-                ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
+                ObjectCode._init(code.encode(), code_type),
+                options=LinkerOptions(
+                    name=options.name,
+                    arch=options.arch,
+                    max_register_count=options.max_register_count,
+                    time=options.time,
+                    debug=options.debug,
+                    lineinfo=options.lineinfo,
+                    ftz=options.ftz,
+                    prec_div=options.prec_div,
+                    prec_sqrt=options.prec_sqrt,
+                    fma=options.fma,
+                    link_time_optimization=options.link_time_optimization,
+                    split_compile=options.split_compile,
+                    ptxas_options=options.ptxas_options,
+                ),
             )
             self._backend = self._linker.backend
 
@@ -782,26 +793,6 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
             assert code_type not in supported_code_types, f"{code_type=}"
             raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")
 
-    def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
-        return LinkerOptions(
-            name=options.name,
-            arch=options.arch,
-            max_register_count=options.max_register_count,
-            time=options.time,
-            debug=options.debug,
-            lineinfo=options.lineinfo,
-            ftz=options.ftz,
-            prec_div=options.prec_div,
-            prec_sqrt=options.prec_sqrt,
-            fma=options.fma,
-            link_time_optimization=options.link_time_optimization,
-            split_compile=options.split_compile,
-            ptxas_options=options.ptxas_options,
-        )
-
-    def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]:
-        return options._prepare_nvvm_options(as_bytes=False)
-
     def close(self):
         """Destroy this program."""
         if self._linker:
@@ -886,7 +877,8 @@ def compile(self, target_type, name_expressions=(), logs=None):
             if target_type not in ("ptx", "ltoir"):
                 raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"')
 
-            nvvm_options = self._translate_program_options_to_nvvm(self._options)
+            # TODO: flip to True when NVIDIA/cuda-python#1354 is resolved and CUDA 12 is dropped
+            nvvm_options = self._options._prepare_nvvm_options(as_bytes=False)
             if target_type == "ltoir" and "-gen-lto" not in nvvm_options:
                 nvvm_options.append("-gen-lto")
             nvvm = _get_nvvm_module()

From d8f853c3d09717b4819d28720158411559b400c0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 03:29:39 +0000
Subject: [PATCH 05/15] Restore _translate_program_options method for PTX code
 path

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 34 +++++++++++---------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 8c2d43e3e7..51b5312754 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -756,22 +756,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
         elif code_type == "ptx":
             assert_type(code, str)
             self._linker = Linker(
-                ObjectCode._init(code.encode(), code_type),
-                options=LinkerOptions(
-                    name=options.name,
-                    arch=options.arch,
-                    max_register_count=options.max_register_count,
-                    time=options.time,
-                    debug=options.debug,
-                    lineinfo=options.lineinfo,
-                    ftz=options.ftz,
-                    prec_div=options.prec_div,
-                    prec_sqrt=options.prec_sqrt,
-                    fma=options.fma,
-                    link_time_optimization=options.link_time_optimization,
-                    split_compile=options.split_compile,
-                    ptxas_options=options.ptxas_options,
-                ),
+                ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
             )
             self._backend = self._linker.backend
 
@@ -793,6 +778,23 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
             assert code_type not in supported_code_types, f"{code_type=}"
             raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")
 
+    def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
+        return LinkerOptions(
+            name=options.name,
+            arch=options.arch,
+            max_register_count=options.max_register_count,
+            time=options.time,
+            debug=options.debug,
+            lineinfo=options.lineinfo,
+            ftz=options.ftz,
+            prec_div=options.prec_div,
+            prec_sqrt=options.prec_sqrt,
+            fma=options.fma,
+            link_time_optimization=options.link_time_optimization,
+            split_compile=options.split_compile,
+            ptxas_options=options.ptxas_options,
+        )
+
     def close(self):
         """Destroy this program."""
         if self._linker:

From 473e2b9d74987847b437bdc0c6b8155f4a71ad4b Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Tue, 9 Dec 2025 22:32:02 -0500
Subject: [PATCH 06/15] remove redundant docstrings

---
 cuda_core/cuda/core/experimental/_program.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 51b5312754..b64dbdff3a 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -676,8 +676,6 @@ def as_bytes(self, backend: str) -> list[bytes]:
         --------
         >>> options = ProgramOptions(arch="sm_80", debug=True)
         >>> nvrtc_options = options.as_bytes("nvrtc")
-        >>> nvjitlink_options = options.as_bytes("nvjitlink")
-        >>> nvvm_options = options.as_bytes("nvvm")
         """
         backend = backend.lower()
         if backend == "nvrtc":

From 79d25088c249b88d04243c943852f9695ee5b4d0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 03:37:19 +0000
Subject: [PATCH 07/15] Remove blank lines in private methods to make code
 blocks more compact

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 25 --------------------
 1 file changed, 25 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index b64dbdff3a..2c8dfde4ad 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -430,48 +430,35 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
 
     def _prepare_nvjitlink_options(self) -> list[bytes]:
         options = []
-        
         # arch is always set
         assert self.arch is not None
         options.append(f"-arch={self.arch}")
-        
         if self.max_register_count is not None:
             options.append(f"-maxrregcount={self.max_register_count}")
-        
         if self.time is not None:
             options.append("-time")
-        
         if self.debug is not None and self.debug:
             options.append("-g")
-        
         if self.lineinfo is not None and self.lineinfo:
             options.append("-lineinfo")
-        
         if self.ftz is not None:
             options.append(f"-ftz={'true' if self.ftz else 'false'}")
-        
         if self.prec_div is not None:
             options.append(f"-prec-div={'true' if self.prec_div else 'false'}")
-        
         if self.prec_sqrt is not None:
             options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}")
-        
         if self.fma is not None:
             options.append(f"-fma={'true' if self.fma else 'false'}")
-        
         if self.link_time_optimization is not None and self.link_time_optimization:
             options.append("-lto")
-        
         if self.ptxas_options is not None:
             if isinstance(self.ptxas_options, str):
                 options.append(f"-Xptxas={self.ptxas_options}")
             elif is_sequence(self.ptxas_options):
                 for opt in self.ptxas_options:
                     options.append(f"-Xptxas={opt}")
-        
         if self.split_compile is not None:
             options.append(f"-split-compile={self.split_compile}")
-        
         # Check for unsupported options and raise error if they are set
         unsupported = []
         if self.relocatable_device_code is not None:
@@ -528,7 +515,6 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
             unsupported.append("minimal")
         if self.numba_debug is not None and self.numba_debug:
             unsupported.append("numba_debug")
-        
         if unsupported:
             raise CUDAError(
                 f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}"
@@ -538,36 +524,27 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
 
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
         options = []
-        
         # Options supported by NVVM
-        
         assert self.arch is not None
         arch = self.arch
         if arch.startswith("sm_"):
             arch = f"compute_{arch[3:]}"
         options.append(f"-arch={arch}")
-        
         if self.debug is not None and self.debug:
             options.append("-g")
-        
         if self.device_code_optimize is False:
             options.append("-opt=0")
         elif self.device_code_optimize is True:
             options.append("-opt=3")
-        
         # NVVM uses 0/1 instead of true/false for boolean options
         if self.ftz is not None:
             options.append(f"-ftz={'1' if self.ftz else '0'}")
-        
         if self.prec_sqrt is not None:
             options.append(f"-prec-sqrt={'1' if self.prec_sqrt else '0'}")
-        
         if self.prec_div is not None:
             options.append(f"-prec-div={'1' if self.prec_div else '0'}")
-        
         if self.fma is not None:
             options.append(f"-fma={'1' if self.fma else '0'}")
-        
         # Check for unsupported options and raise error if they are set
         unsupported = []
         if self.relocatable_device_code is not None:
@@ -634,12 +611,10 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
             unsupported.append("minimal")
         if self.numba_debug is not None and self.numba_debug:
             unsupported.append("numba_debug")
-        
         if unsupported:
             raise CUDAError(
                 f"The following options are not supported by NVVM backend: {', '.join(unsupported)}"
             )
-        
         if as_bytes:
             return list(o.encode() for o in options)
         else:

From 3f658b5b498f7bbb76be4e7e055a68e53debdbbd Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Tue, 9 Dec 2025 22:49:37 -0500
Subject: [PATCH 08/15] nits

---
 cuda_core/cuda/core/experimental/_program.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 2c8dfde4ad..51ae07d57c 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -430,6 +430,7 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
 
     def _prepare_nvjitlink_options(self) -> list[bytes]:
         options = []
+
         # arch is always set
         assert self.arch is not None
         options.append(f"-arch={self.arch}")
@@ -459,6 +460,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
                     options.append(f"-Xptxas={opt}")
         if self.split_compile is not None:
             options.append(f"-split-compile={self.split_compile}")
+
         # Check for unsupported options and raise error if they are set
         unsupported = []
         if self.relocatable_device_code is not None:
@@ -524,6 +526,7 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
 
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
         options = []
+
         # Options supported by NVVM
         assert self.arch is not None
         arch = self.arch
@@ -545,6 +548,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
             options.append(f"-prec-div={'1' if self.prec_div else '0'}")
         if self.fma is not None:
             options.append(f"-fma={'1' if self.fma else '0'}")
+
         # Check for unsupported options and raise error if they are set
         unsupported = []
         if self.relocatable_device_code is not None:
@@ -615,6 +619,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
             raise CUDAError(
                 f"The following options are not supported by NVVM backend: {', '.join(unsupported)}"
             )
+
         if as_bytes:
             return list(o.encode() for o in options)
         else:
@@ -630,10 +635,8 @@ def as_bytes(self, backend: str) -> list[bytes]:
         Parameters
         ----------
         backend : str
-            The compiler backend to prepare options for. Must be one of:
-            - "nvrtc": NVIDIA Runtime Compilation (NVRTC)
-            - "nvjitlink": NVIDIA JIT Linker
-            - "nvvm": NVIDIA LLVM-based compiler
+            The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink",
+            or "nvvm".
         
         Returns
         -------

From 7b1ecbb621e9b511e2d95438ef4a66542d8f08ec Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Dec 2025 03:55:43 +0000
Subject: [PATCH 09/15] [pre-commit.ci] auto code formatting

---
 cuda_core/cuda/core/experimental/_program.py | 24 ++++++++------------
 cuda_core/tests/test_program.py              | 12 +++++-----
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 51ae07d57c..599ce362c4 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -518,10 +518,8 @@ def _prepare_nvjitlink_options(self) -> list[bytes]:
         if self.numba_debug is not None and self.numba_debug:
             unsupported.append("numba_debug")
         if unsupported:
-            raise CUDAError(
-                f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}"
-            )
-        
+            raise CUDAError(f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}")
+
         return list(o.encode() for o in options)
 
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
@@ -616,9 +614,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
         if self.numba_debug is not None and self.numba_debug:
             unsupported.append("numba_debug")
         if unsupported:
-            raise CUDAError(
-                f"The following options are not supported by NVVM backend: {', '.join(unsupported)}"
-            )
+            raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}")
 
         if as_bytes:
             return list(o.encode() for o in options)
@@ -627,29 +623,29 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
 
     def as_bytes(self, backend: str) -> list[bytes]:
         """Convert program options to bytes format for the specified backend.
-        
+
         This method transforms the program options into a format suitable for the
         specified compiler backend. Different backends may use different option names
         and formats even for the same conceptual options.
-        
+
         Parameters
         ----------
         backend : str
             The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink",
             or "nvvm".
-        
+
         Returns
         -------
         list[bytes]
             List of option strings encoded as bytes.
-        
+
         Raises
         ------
         ValueError
             If an unknown backend is specified.
         CUDAError
             If an option incompatible with the specified backend is set.
-        
+
         Examples
         --------
         >>> options = ProgramOptions(arch="sm_80", debug=True)
@@ -663,9 +659,7 @@ def as_bytes(self, backend: str) -> list[bytes]:
         elif backend == "nvvm":
             return self._prepare_nvvm_options(as_bytes=True)
         else:
-            raise ValueError(
-                f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'"
-            )
+            raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'")
 
     def __repr__(self):
         # __TODO__ improve this
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 31897ab786..540fd94b0b 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -417,11 +417,11 @@ def test_program_options_as_bytes_nvrtc():
     """Test ProgramOptions.as_bytes() for NVRTC backend"""
     options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True)
     nvrtc_options = options.as_bytes("nvrtc")
-    
+
     # Should return list of bytes
     assert isinstance(nvrtc_options, list)
     assert all(isinstance(opt, bytes) for opt in nvrtc_options)
-    
+
     # Decode to check content
     options_str = [opt.decode() for opt in nvrtc_options]
     assert "-arch=sm_80" in options_str
@@ -434,11 +434,11 @@ def test_program_options_as_bytes_nvjitlink():
     """Test ProgramOptions.as_bytes() for nvJitLink backend"""
     options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32)
     nvjitlink_options = options.as_bytes("nvjitlink")
-    
+
     # Should return list of bytes
     assert isinstance(nvjitlink_options, list)
     assert all(isinstance(opt, bytes) for opt in nvjitlink_options)
-    
+
     # Decode to check content
     options_str = [opt.decode() for opt in nvjitlink_options]
     assert "-arch=sm_80" in options_str
@@ -452,11 +452,11 @@ def test_program_options_as_bytes_nvvm():
     """Test ProgramOptions.as_bytes() for NVVM backend"""
     options = ProgramOptions(arch="sm_80", debug=True, ftz=True, device_code_optimize=True)
     nvvm_options = options.as_bytes("nvvm")
-    
+
     # Should return list of bytes (same as other backends)
     assert isinstance(nvvm_options, list)
     assert all(isinstance(opt, bytes) for opt in nvvm_options)
-    
+
     # Decode to check content
     options_str = [opt.decode() for opt in nvvm_options]
     assert "-arch=compute_80" in options_str

From f2bb74ef3b61c1494540e24299d9ebe7b819df1d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 15:24:22 +0000
Subject: [PATCH 10/15] Refactor: move NVRTC option building to lazy evaluation
 in _prepare_nvrtc_options

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_program.py | 119 +++++++++----------
 1 file changed, 54 insertions(+), 65 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 599ce362c4..28e4e12eb2 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -303,130 +303,120 @@ class ProgramOptions:
 
     def __post_init__(self):
         self._name = self.name.encode()
-
-        self._formatted_options = []
-        if self.arch is not None:
-            self._formatted_options.append(f"-arch={self.arch}")
-        else:
+        # Set arch to default if not provided
+        if self.arch is None:
             self.arch = f"sm_{Device().arch}"
-            self._formatted_options.append(f"-arch={self.arch}")
+
+    def _prepare_nvrtc_options(self) -> list[bytes]:
+        # Build NVRTC-specific options
+        options = []
+        options.append(f"-arch={self.arch}")
         if self.relocatable_device_code is not None:
-            self._formatted_options.append(
-                f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}"
-            )
+            options.append(f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}")
         if self.extensible_whole_program is not None and self.extensible_whole_program:
-            self._formatted_options.append("--extensible-whole-program")
+            options.append("--extensible-whole-program")
         if self.debug is not None and self.debug:
-            self._formatted_options.append("--device-debug")
+            options.append("--device-debug")
         if self.lineinfo is not None and self.lineinfo:
-            self._formatted_options.append("--generate-line-info")
+            options.append("--generate-line-info")
         if self.device_code_optimize is not None and self.device_code_optimize:
-            self._formatted_options.append("--dopt=on")
+            options.append("--dopt=on")
         if self.ptxas_options is not None:
             opt_name = "--ptxas-options"
             if isinstance(self.ptxas_options, str):
-                self._formatted_options.append(f"{opt_name}={self.ptxas_options}")
+                options.append(f"{opt_name}={self.ptxas_options}")
             elif is_sequence(self.ptxas_options):
                 for opt_value in self.ptxas_options:
-                    self._formatted_options.append(f"{opt_name}={opt_value}")
+                    options.append(f"{opt_name}={opt_value}")
         if self.max_register_count is not None:
-            self._formatted_options.append(f"--maxrregcount={self.max_register_count}")
+            options.append(f"--maxrregcount={self.max_register_count}")
         if self.ftz is not None:
-            self._formatted_options.append(f"--ftz={_handle_boolean_option(self.ftz)}")
+            options.append(f"--ftz={_handle_boolean_option(self.ftz)}")
         if self.prec_sqrt is not None:
-            self._formatted_options.append(f"--prec-sqrt={_handle_boolean_option(self.prec_sqrt)}")
+            options.append(f"--prec-sqrt={_handle_boolean_option(self.prec_sqrt)}")
         if self.prec_div is not None:
-            self._formatted_options.append(f"--prec-div={_handle_boolean_option(self.prec_div)}")
+            options.append(f"--prec-div={_handle_boolean_option(self.prec_div)}")
         if self.fma is not None:
-            self._formatted_options.append(f"--fmad={_handle_boolean_option(self.fma)}")
+            options.append(f"--fmad={_handle_boolean_option(self.fma)}")
         if self.use_fast_math is not None and self.use_fast_math:
-            self._formatted_options.append("--use_fast_math")
+            options.append("--use_fast_math")
         if self.extra_device_vectorization is not None and self.extra_device_vectorization:
-            self._formatted_options.append("--extra-device-vectorization")
+            options.append("--extra-device-vectorization")
         if self.link_time_optimization is not None and self.link_time_optimization:
-            self._formatted_options.append("--dlink-time-opt")
+            options.append("--dlink-time-opt")
         if self.gen_opt_lto is not None and self.gen_opt_lto:
-            self._formatted_options.append("--gen-opt-lto")
+            options.append("--gen-opt-lto")
         if self.define_macro is not None:
-            _process_define_macro(self._formatted_options, self.define_macro)
+            _process_define_macro(options, self.define_macro)
         if self.undefine_macro is not None:
             if isinstance(self.undefine_macro, str):
-                self._formatted_options.append(f"--undefine-macro={self.undefine_macro}")
+                options.append(f"--undefine-macro={self.undefine_macro}")
             elif is_sequence(self.undefine_macro):
                 for macro in self.undefine_macro:
-                    self._formatted_options.append(f"--undefine-macro={macro}")
+                    options.append(f"--undefine-macro={macro}")
         if self.include_path is not None:
             if isinstance(self.include_path, str):
-                self._formatted_options.append(f"--include-path={self.include_path}")
+                options.append(f"--include-path={self.include_path}")
             elif is_sequence(self.include_path):
                 for path in self.include_path:
-                    self._formatted_options.append(f"--include-path={path}")
+                    options.append(f"--include-path={path}")
         if self.pre_include is not None:
             if isinstance(self.pre_include, str):
-                self._formatted_options.append(f"--pre-include={self.pre_include}")
+                options.append(f"--pre-include={self.pre_include}")
             elif is_sequence(self.pre_include):
                 for header in self.pre_include:
-                    self._formatted_options.append(f"--pre-include={header}")
-
+                    options.append(f"--pre-include={header}")
         if self.no_source_include is not None and self.no_source_include:
-            self._formatted_options.append("--no-source-include")
+            options.append("--no-source-include")
         if self.std is not None:
-            self._formatted_options.append(f"--std={self.std}")
+            options.append(f"--std={self.std}")
         if self.builtin_move_forward is not None:
-            self._formatted_options.append(
-                f"--builtin-move-forward={_handle_boolean_option(self.builtin_move_forward)}"
-            )
+            options.append(f"--builtin-move-forward={_handle_boolean_option(self.builtin_move_forward)}")
         if self.builtin_initializer_list is not None:
-            self._formatted_options.append(
-                f"--builtin-initializer-list={_handle_boolean_option(self.builtin_initializer_list)}"
-            )
+            options.append(f"--builtin-initializer-list={_handle_boolean_option(self.builtin_initializer_list)}")
         if self.disable_warnings is not None and self.disable_warnings:
-            self._formatted_options.append("--disable-warnings")
+            options.append("--disable-warnings")
         if self.restrict is not None and self.restrict:
-            self._formatted_options.append("--restrict")
+            options.append("--restrict")
         if self.device_as_default_execution_space is not None and self.device_as_default_execution_space:
-            self._formatted_options.append("--device-as-default-execution-space")
+            options.append("--device-as-default-execution-space")
         if self.device_int128 is not None and self.device_int128:
-            self._formatted_options.append("--device-int128")
+            options.append("--device-int128")
         if self.optimization_info is not None:
-            self._formatted_options.append(f"--optimization-info={self.optimization_info}")
+            options.append(f"--optimization-info={self.optimization_info}")
         if self.no_display_error_number is not None and self.no_display_error_number:
-            self._formatted_options.append("--no-display-error-number")
+            options.append("--no-display-error-number")
         if self.diag_error is not None:
             if isinstance(self.diag_error, int):
-                self._formatted_options.append(f"--diag-error={self.diag_error}")
+                options.append(f"--diag-error={self.diag_error}")
             elif is_sequence(self.diag_error):
                 for error in self.diag_error:
-                    self._formatted_options.append(f"--diag-error={error}")
+                    options.append(f"--diag-error={error}")
         if self.diag_suppress is not None:
             if isinstance(self.diag_suppress, int):
-                self._formatted_options.append(f"--diag-suppress={self.diag_suppress}")
+                options.append(f"--diag-suppress={self.diag_suppress}")
             elif is_sequence(self.diag_suppress):
                 for suppress in self.diag_suppress:
-                    self._formatted_options.append(f"--diag-suppress={suppress}")
+                    options.append(f"--diag-suppress={suppress}")
         if self.diag_warn is not None:
             if isinstance(self.diag_warn, int):
-                self._formatted_options.append(f"--diag-warn={self.diag_warn}")
+                options.append(f"--diag-warn={self.diag_warn}")
             elif is_sequence(self.diag_warn):
                 for warn in self.diag_warn:
-                    self._formatted_options.append(f"--diag-warn={warn}")
+                    options.append(f"--diag-warn={warn}")
         if self.brief_diagnostics is not None:
-            self._formatted_options.append(f"--brief-diagnostics={_handle_boolean_option(self.brief_diagnostics)}")
+            options.append(f"--brief-diagnostics={_handle_boolean_option(self.brief_diagnostics)}")
         if self.time is not None:
-            self._formatted_options.append(f"--time={self.time}")
+            options.append(f"--time={self.time}")
         if self.split_compile is not None:
-            self._formatted_options.append(f"--split-compile={self.split_compile}")
+            options.append(f"--split-compile={self.split_compile}")
         if self.fdevice_syntax_only is not None and self.fdevice_syntax_only:
-            self._formatted_options.append("--fdevice-syntax-only")
+            options.append("--fdevice-syntax-only")
         if self.minimal is not None and self.minimal:
-            self._formatted_options.append("--minimal")
+            options.append("--minimal")
         if self.numba_debug:
-            self._formatted_options.append("--numba-debug")
-
-    def _prepare_nvrtc_options(self) -> list[bytes]:
-        # NVRTC uses all the formatted options that were set in __post_init__
-        # All options in _formatted_options are already NVRTC-compatible
-        return list(o.encode() for o in self._formatted_options)
+            options.append("--numba-debug")
+        return list(o.encode() for o in options)
 
     def _prepare_nvjitlink_options(self) -> list[bytes]:
         options = []
@@ -662,8 +652,7 @@ def as_bytes(self, backend: str) -> list[bytes]:
             raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'")
 
     def __repr__(self):
-        # __TODO__ improve this
-        return str(self._formatted_options)
+        return f"ProgramOptions(name={self.name!r}, arch={self.arch!r})"
 
 
 ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT]

From 32fa9303990ab28df8dccda44541b48c30e279cf Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sat, 13 Dec 2025 02:10:53 +0000
Subject: [PATCH 11/15] fix & expand tests

---
 cuda_core/cuda/core/experimental/_program.py | 2 --
 cuda_core/tests/test_program.py              | 9 +++++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 28e4e12eb2..d19756d5dd 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -553,8 +553,6 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
             unsupported.append("use_fast_math")
         if self.extra_device_vectorization is not None and self.extra_device_vectorization:
             unsupported.append("extra_device_vectorization")
-        if self.link_time_optimization is not None and self.link_time_optimization:
-            unsupported.append("link_time_optimization")
         if self.gen_opt_lto is not None and self.gen_opt_lto:
             unsupported.append("gen_opt_lto")
         if self.define_macro is not None:
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 540fd94b0b..ae3d5ab559 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -220,6 +220,15 @@ def ptx_code_object():
         ProgramOptions(diag_warn=1000),
         ProgramOptions(std="c++11", ptxas_options=["-v"]),
         ProgramOptions(std="c++11", ptxas_options=["-v", "-O2"]),
+        ProgramOptions(brief_diagnostics=True),
+        ProgramOptions(builtin_move_forward=False),
+        ProgramOptions(extensible_whole_program=True),
+        ProgramOptions(fdevice_syntax_only=True),
+        ProgramOptions(gen_opt_lto=True),
+        ProgramOptions(minimal=True),
+        ProgramOptions(no_source_include=True),
+        # TODO: Add test for pre_include once we have a suitable header in the test environment
+        # ProgramOptions(pre_include="cuda_runtime.h"),
     ],
 )
 def test_cpp_program_with_various_options(init_cuda, options):

From e388a9ea3f686a99372e47565ee033f4ff076e91 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sat, 13 Dec 2025 02:50:16 +0000
Subject: [PATCH 12/15] cover new NVRTC options

---
 cuda_core/cuda/core/experimental/_program.py | 75 ++++++++++++++++++++
 cuda_core/tests/test_program.py              | 62 ++++++++++++++++
 2 files changed, 137 insertions(+)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index d19756d5dd..adce5dbac5 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -256,6 +256,42 @@ class ProgramOptions:
     minimal : bool, optional
         Omit certain language features to reduce compile time for small programs.
         Default: False
+    no_cache : bool, optional
+        Disable compiler caching.
+        Default: False
+    fdevice_time_trace : str, optional
+        Generate time trace JSON for profiling compilation (NVRTC only).
+        Default: None
+    device_float128 : bool, optional
+        Allow __float128 type in device code (NVRTC only).
+        Default: False
+    frandom_seed : str, optional
+        Set random seed for randomized optimizations (NVRTC only).
+        Default: None
+    ofast_compile : str, optional
+        Fast compilation mode: "0", "min", "mid", or "max" (NVRTC only).
+        Default: None
+    pch : bool, optional
+        Use default precompiled header (NVRTC only, CUDA 12.8+).
+        Default: False
+    create_pch : str, optional
+        Create precompiled header file (NVRTC only, CUDA 12.8+).
+        Default: None
+    use_pch : str, optional
+        Use specific precompiled header file (NVRTC only, CUDA 12.8+).
+        Default: None
+    pch_dir : str, optional
+        PCH directory location (NVRTC only, CUDA 12.8+).
+        Default: None
+    pch_verbose : bool, optional
+        Verbose PCH output (NVRTC only, CUDA 12.8+).
+        Default: False
+    pch_messages : bool, optional
+        Control PCH diagnostic messages (NVRTC only, CUDA 12.8+).
+        Default: False
+    instantiate_templates_in_pch : bool, optional
+        Control template instantiation in PCH (NVRTC only, CUDA 12.8+).
+        Default: False
     """
 
     name: str | None = "<default program>"
@@ -299,6 +335,18 @@ class ProgramOptions:
     split_compile: int | None = None
     fdevice_syntax_only: bool | None = None
     minimal: bool | None = None
+    no_cache: bool | None = None
+    fdevice_time_trace: str | None = None
+    device_float128: bool | None = None
+    frandom_seed: str | None = None
+    ofast_compile: str | None = None
+    pch: bool | None = None
+    create_pch: str | None = None
+    use_pch: str | None = None
+    pch_dir: str | None = None
+    pch_verbose: bool | None = None
+    pch_messages: bool | None = None
+    instantiate_templates_in_pch: bool | None = None
     numba_debug: bool | None = None  # Custom option for Numba debugging
 
     def __post_init__(self):
@@ -382,6 +430,8 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
             options.append("--device-as-default-execution-space")
         if self.device_int128 is not None and self.device_int128:
             options.append("--device-int128")
+        if self.device_float128 is not None and self.device_float128:
+            options.append("--device-float128")
         if self.optimization_info is not None:
             options.append(f"--optimization-info={self.optimization_info}")
         if self.no_display_error_number is not None and self.no_display_error_number:
@@ -414,6 +464,31 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
             options.append("--fdevice-syntax-only")
         if self.minimal is not None and self.minimal:
             options.append("--minimal")
+        if self.no_cache is not None and self.no_cache:
+            options.append("--no-cache")
+        if self.fdevice_time_trace is not None:
+            options.append(f"--fdevice-time-trace={self.fdevice_time_trace}")
+        if self.frandom_seed is not None:
+            options.append(f"--frandom-seed={self.frandom_seed}")
+        if self.ofast_compile is not None:
+            options.append(f"--Ofast-compile={self.ofast_compile}")
+        # PCH options (CUDA 12.8+)
+        if self.pch is not None and self.pch:
+            options.append("--pch")
+        if self.create_pch is not None:
+            options.append(f"--create-pch={self.create_pch}")
+        if self.use_pch is not None:
+            options.append(f"--use-pch={self.use_pch}")
+        if self.pch_dir is not None:
+            options.append(f"--pch-dir={self.pch_dir}")
+        if self.pch_verbose is not None:
+            options.append(f"--pch-verbose={_handle_boolean_option(self.pch_verbose)}")
+        if self.pch_messages is not None:
+            options.append(f"--pch-messages={_handle_boolean_option(self.pch_messages)}")
+        if self.instantiate_templates_in_pch is not None:
+            options.append(
+                f"--instantiate-templates-in-pch={_handle_boolean_option(self.instantiate_templates_in_pch)}"
+            )
         if self.numba_debug:
             options.append("--numba-debug")
         return list(o.encode() for o in options)
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index ae3d5ab559..1da2372317 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -7,6 +7,7 @@
 
 import pytest
 from cuda.core.experimental import _linker
+from cuda.core.experimental._device import Device
 from cuda.core.experimental._module import Kernel, ObjectCode
 from cuda.core.experimental._program import Program, ProgramOptions
 from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, handle_return
@@ -229,6 +230,67 @@ def ptx_code_object():
         ProgramOptions(no_source_include=True),
         # TODO: Add test for pre_include once we have a suitable header in the test environment
         # ProgramOptions(pre_include="cuda_runtime.h"),
+        ProgramOptions(no_cache=True),
+        ProgramOptions(fdevice_time_trace="trace.json"),
+        pytest.param(
+            ProgramOptions(arch="sm_100", device_float128=True),
+            marks=pytest.mark.skipif(
+                Device().compute_capability < (100, 0),
+                reason="device_float128 requires sm_100 or later",
+            ),
+        ),
+        ProgramOptions(frandom_seed="12345"),
+        ProgramOptions(ofast_compile="min"),
+        pytest.param(
+            ProgramOptions(pch=True),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
+        pytest.param(
+            ProgramOptions(create_pch="test.pch"),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
+        pytest.param(
+            ProgramOptions(use_pch="test.pch"),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
+        # TODO: pch_dir requires actual PCH directory to exist - needs integration test
+        # pytest.param(
+        #     ProgramOptions(pch_dir="/tmp/pch"),
+        #     marks=pytest.mark.skipif(
+        #         (_get_nvrtc_version_for_tests() or 0) < 12800,
+        #         reason="PCH requires NVRTC >= 12.8",
+        #     ),
+        # ),
+        pytest.param(
+            ProgramOptions(pch_verbose=True),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
+        pytest.param(
+            ProgramOptions(pch_messages=False),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
+        pytest.param(
+            ProgramOptions(instantiate_templates_in_pch=True),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 12800,
+                reason="PCH requires NVRTC >= 12.8",
+            ),
+        ),
     ],
 )
 def test_cpp_program_with_various_options(init_cuda, options):

From 15f4b6a52712c0d7af5cb51fa9dd175bf7e22705 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sat, 13 Dec 2025 04:59:53 +0000
Subject: [PATCH 13/15] fix linker options handling

---
 cuda_core/cuda/core/experimental/_linker.py  | 150 ++++++++++++-------
 cuda_core/cuda/core/experimental/_program.py | 104 +------------
 cuda_core/tests/test_linker.py               |  33 ++++
 cuda_core/tests/test_program.py              |  24 ---
 4 files changed, 130 insertions(+), 181 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 5c54a88c8c..2c94fb9b02 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -202,74 +202,78 @@ class LinkerOptions:
     def __post_init__(self):
         _lazy_init()
         self._name = self.name.encode()
-        self.formatted_options = []
-        if _nvjitlink:
-            self._init_nvjitlink()
-        else:
-            self._init_driver()
 
-    def _init_nvjitlink(self):
+    def _prepare_nvjitlink_options(self, as_bytes: bool = False) -> Union[list[bytes], list[str]]:
+        options = []
+
         if self.arch is not None:
-            self.formatted_options.append(f"-arch={self.arch}")
+            options.append(f"-arch={self.arch}")
         else:
-            self.formatted_options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability))
+            options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability))
         if self.max_register_count is not None:
-            self.formatted_options.append(f"-maxrregcount={self.max_register_count}")
+            options.append(f"-maxrregcount={self.max_register_count}")
         if self.time is not None:
-            self.formatted_options.append("-time")
+            options.append("-time")
         if self.verbose:
-            self.formatted_options.append("-verbose")
+            options.append("-verbose")
         if self.link_time_optimization:
-            self.formatted_options.append("-lto")
+            options.append("-lto")
         if self.ptx:
-            self.formatted_options.append("-ptx")
+            options.append("-ptx")
         if self.optimization_level is not None:
-            self.formatted_options.append(f"-O{self.optimization_level}")
+            options.append(f"-O{self.optimization_level}")
         if self.debug:
-            self.formatted_options.append("-g")
+            options.append("-g")
         if self.lineinfo:
-            self.formatted_options.append("-lineinfo")
+            options.append("-lineinfo")
         if self.ftz is not None:
-            self.formatted_options.append(f"-ftz={'true' if self.ftz else 'false'}")
+            options.append(f"-ftz={'true' if self.ftz else 'false'}")
         if self.prec_div is not None:
-            self.formatted_options.append(f"-prec-div={'true' if self.prec_div else 'false'}")
+            options.append(f"-prec-div={'true' if self.prec_div else 'false'}")
         if self.prec_sqrt is not None:
-            self.formatted_options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}")
+            options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}")
         if self.fma is not None:
-            self.formatted_options.append(f"-fma={'true' if self.fma else 'false'}")
+            options.append(f"-fma={'true' if self.fma else 'false'}")
         if self.kernels_used is not None:
             if isinstance(self.kernels_used, str):
-                self.formatted_options.append(f"-kernels-used={self.kernels_used}")
+                options.append(f"-kernels-used={self.kernels_used}")
             elif isinstance(self.kernels_used, list):
                 for kernel in self.kernels_used:
-                    self.formatted_options.append(f"-kernels-used={kernel}")
+                    options.append(f"-kernels-used={kernel}")
         if self.variables_used is not None:
             if isinstance(self.variables_used, str):
-                self.formatted_options.append(f"-variables-used={self.variables_used}")
+                options.append(f"-variables-used={self.variables_used}")
             elif isinstance(self.variables_used, list):
                 for variable in self.variables_used:
-                    self.formatted_options.append(f"-variables-used={variable}")
+                    options.append(f"-variables-used={variable}")
         if self.optimize_unused_variables is not None:
-            self.formatted_options.append("-optimize-unused-variables")
+            options.append("-optimize-unused-variables")
         if self.ptxas_options is not None:
             if isinstance(self.ptxas_options, str):
-                self.formatted_options.append(f"-Xptxas={self.ptxas_options}")
+                options.append(f"-Xptxas={self.ptxas_options}")
             elif is_sequence(self.ptxas_options):
                 for opt in self.ptxas_options:
-                    self.formatted_options.append(f"-Xptxas={opt}")
+                    options.append(f"-Xptxas={opt}")
         if self.split_compile is not None:
-            self.formatted_options.append(f"-split-compile={self.split_compile}")
+            options.append(f"-split-compile={self.split_compile}")
         if self.split_compile_extended is not None:
-            self.formatted_options.append(f"-split-compile-extended={self.split_compile_extended}")
+            options.append(f"-split-compile-extended={self.split_compile_extended}")
         if self.no_cache is True:
-            self.formatted_options.append("-no-cache")
+            options.append("-no-cache")
+
+        if as_bytes:
+            return [o.encode() for o in options]
+        else:
+            return options
+
+    def _prepare_driver_options(self) -> tuple[list, list]:
+        formatted_options = []
+        option_keys = []
 
-    def _init_driver(self):
-        self.option_keys = []
         # allocate 4 KiB each for info/error logs
         size = 4194304
-        self.formatted_options.extend((bytearray(size), size, bytearray(size), size))
-        self.option_keys.extend(
+        formatted_options.extend((bytearray(size), size, bytearray(size), size))
+        option_keys.extend(
             (
                 _driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER,
                 _driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
@@ -280,30 +284,30 @@ def _init_driver(self):
 
         if self.arch is not None:
             arch = self.arch.split("_")[-1].upper()
-            self.formatted_options.append(getattr(_driver.CUjit_target, f"CU_TARGET_COMPUTE_{arch}"))
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_TARGET)
+            formatted_options.append(getattr(_driver.CUjit_target, f"CU_TARGET_COMPUTE_{arch}"))
+            option_keys.append(_driver.CUjit_option.CU_JIT_TARGET)
         if self.max_register_count is not None:
-            self.formatted_options.append(self.max_register_count)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_MAX_REGISTERS)
+            formatted_options.append(self.max_register_count)
+            option_keys.append(_driver.CUjit_option.CU_JIT_MAX_REGISTERS)
         if self.time is not None:
             raise ValueError("time option is not supported by the driver API")
         if self.verbose:
-            self.formatted_options.append(1)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_LOG_VERBOSE)
+            formatted_options.append(1)
+            option_keys.append(_driver.CUjit_option.CU_JIT_LOG_VERBOSE)
         if self.link_time_optimization:
-            self.formatted_options.append(1)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_LTO)
+            formatted_options.append(1)
+            option_keys.append(_driver.CUjit_option.CU_JIT_LTO)
         if self.ptx:
             raise ValueError("ptx option is not supported by the driver API")
         if self.optimization_level is not None:
-            self.formatted_options.append(self.optimization_level)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL)
+            formatted_options.append(self.optimization_level)
+            option_keys.append(_driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL)
         if self.debug:
-            self.formatted_options.append(1)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO)
+            formatted_options.append(1)
+            option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO)
         if self.lineinfo:
-            self.formatted_options.append(1)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO)
+            formatted_options.append(1)
+            option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO)
         if self.ftz is not None:
             warn("ftz option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.prec_div is not None:
@@ -325,8 +329,37 @@ def _init_driver(self):
         if self.split_compile_extended is not None:
             raise ValueError("split_compile_extended option is not supported by the driver API")
         if self.no_cache is True:
-            self.formatted_options.append(_driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE)
-            self.option_keys.append(_driver.CUjit_option.CU_JIT_CACHE_MODE)
+            formatted_options.append(_driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE)
+            option_keys.append(_driver.CUjit_option.CU_JIT_CACHE_MODE)
+
+        return formatted_options, option_keys
+
+    def as_bytes(self, backend: str = "nvjitlink") -> list[bytes]:
+        """Convert linker options to bytes format for the nvjitlink backend.
+
+        Parameters
+        ----------
+        backend : str, optional
+            The linker backend. Only "nvjitlink" is supported. Default is "nvjitlink".
+
+        Returns
+        -------
+        list[bytes]
+            List of option strings encoded as bytes.
+
+        Raises
+        ------
+        ValueError
+            If an unsupported backend is specified.
+        RuntimeError
+            If nvJitLink backend is not available.
+        """
+        backend = backend.lower()
+        if backend != "nvjitlink":
+            raise ValueError(f"as_bytes() only supports 'nvjitlink' backend, got '{backend}'")
+        if not _nvjitlink:
+            raise RuntimeError("nvJitLink backend is not available")
+        return self._prepare_nvjitlink_options(as_bytes=True)
 
 
 # This needs to be a free function not a method, as it's disallowed by contextmanager.
@@ -369,7 +402,7 @@ class Linker:
     """
 
     class _MembersNeededForFinalize:
-        __slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive")
+        __slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive", "formatted_options", "option_keys")
 
         def __init__(self, program_obj, handle, use_nvjitlink):
             self.handle = handle
@@ -394,14 +427,17 @@ def __init__(self, *object_codes: ObjectCode, options: LinkerOptions = None):
         self._options = options = check_or_create_options(LinkerOptions, options, "Linker options")
         with _exception_manager(self):
             if _nvjitlink:
-                handle = _nvjitlink.create(len(options.formatted_options), options.formatted_options)
+                formatted_options = options._prepare_nvjitlink_options(as_bytes=False)
+                handle = _nvjitlink.create(len(formatted_options), formatted_options)
                 use_nvjitlink = True
             else:
-                handle = handle_return(
-                    _driver.cuLinkCreate(len(options.formatted_options), options.option_keys, options.formatted_options)
-                )
+                formatted_options, option_keys = options._prepare_driver_options()
+                handle = handle_return(_driver.cuLinkCreate(len(formatted_options), option_keys, formatted_options))
                 use_nvjitlink = False
         self._mnff = Linker._MembersNeededForFinalize(self, handle, use_nvjitlink)
+        self._mnff.formatted_options = formatted_options  # Store for log access
+        if not _nvjitlink:
+            self._mnff.option_keys = option_keys
 
         for code in object_codes:
             assert_type(code, ObjectCode)
@@ -508,7 +544,7 @@ def get_error_log(self) -> str:
             log = bytearray(log_size)
             _nvjitlink.get_error_log(self._mnff.handle, log)
         else:
-            log = self._options.formatted_options[2]
+            log = self._mnff.formatted_options[2]
         return log.decode("utf-8", errors="backslashreplace")
 
     def get_info_log(self) -> str:
@@ -524,7 +560,7 @@ def get_info_log(self) -> str:
             log = bytearray(log_size)
             _nvjitlink.get_info_log(self._mnff.handle, log)
         else:
-            log = self._options.formatted_options[0]
+            log = self._mnff.formatted_options[0]
         return log.decode("utf-8", errors="backslashreplace")
 
     def _input_type_from_code_type(self, code_type: str):
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index adce5dbac5..a287131007 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -493,100 +493,6 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
             options.append("--numba-debug")
         return list(o.encode() for o in options)
 
-    def _prepare_nvjitlink_options(self) -> list[bytes]:
-        options = []
-
-        # arch is always set
-        assert self.arch is not None
-        options.append(f"-arch={self.arch}")
-        if self.max_register_count is not None:
-            options.append(f"-maxrregcount={self.max_register_count}")
-        if self.time is not None:
-            options.append("-time")
-        if self.debug is not None and self.debug:
-            options.append("-g")
-        if self.lineinfo is not None and self.lineinfo:
-            options.append("-lineinfo")
-        if self.ftz is not None:
-            options.append(f"-ftz={'true' if self.ftz else 'false'}")
-        if self.prec_div is not None:
-            options.append(f"-prec-div={'true' if self.prec_div else 'false'}")
-        if self.prec_sqrt is not None:
-            options.append(f"-prec-sqrt={'true' if self.prec_sqrt else 'false'}")
-        if self.fma is not None:
-            options.append(f"-fma={'true' if self.fma else 'false'}")
-        if self.link_time_optimization is not None and self.link_time_optimization:
-            options.append("-lto")
-        if self.ptxas_options is not None:
-            if isinstance(self.ptxas_options, str):
-                options.append(f"-Xptxas={self.ptxas_options}")
-            elif is_sequence(self.ptxas_options):
-                for opt in self.ptxas_options:
-                    options.append(f"-Xptxas={opt}")
-        if self.split_compile is not None:
-            options.append(f"-split-compile={self.split_compile}")
-
-        # Check for unsupported options and raise error if they are set
-        unsupported = []
-        if self.relocatable_device_code is not None:
-            unsupported.append("relocatable_device_code")
-        if self.extensible_whole_program is not None and self.extensible_whole_program:
-            unsupported.append("extensible_whole_program")
-        if self.device_code_optimize is not None:
-            unsupported.append("device_code_optimize")
-        if self.use_fast_math is not None and self.use_fast_math:
-            unsupported.append("use_fast_math")
-        if self.extra_device_vectorization is not None and self.extra_device_vectorization:
-            unsupported.append("extra_device_vectorization")
-        if self.gen_opt_lto is not None and self.gen_opt_lto:
-            unsupported.append("gen_opt_lto")
-        if self.define_macro is not None:
-            unsupported.append("define_macro")
-        if self.undefine_macro is not None:
-            unsupported.append("undefine_macro")
-        if self.include_path is not None:
-            unsupported.append("include_path")
-        if self.pre_include is not None:
-            unsupported.append("pre_include")
-        if self.no_source_include is not None and self.no_source_include:
-            unsupported.append("no_source_include")
-        if self.std is not None:
-            unsupported.append("std")
-        if self.builtin_move_forward is not None:
-            unsupported.append("builtin_move_forward")
-        if self.builtin_initializer_list is not None:
-            unsupported.append("builtin_initializer_list")
-        if self.disable_warnings is not None and self.disable_warnings:
-            unsupported.append("disable_warnings")
-        if self.restrict is not None and self.restrict:
-            unsupported.append("restrict")
-        if self.device_as_default_execution_space is not None and self.device_as_default_execution_space:
-            unsupported.append("device_as_default_execution_space")
-        if self.device_int128 is not None and self.device_int128:
-            unsupported.append("device_int128")
-        if self.optimization_info is not None:
-            unsupported.append("optimization_info")
-        if self.no_display_error_number is not None and self.no_display_error_number:
-            unsupported.append("no_display_error_number")
-        if self.diag_error is not None:
-            unsupported.append("diag_error")
-        if self.diag_suppress is not None:
-            unsupported.append("diag_suppress")
-        if self.diag_warn is not None:
-            unsupported.append("diag_warn")
-        if self.brief_diagnostics is not None:
-            unsupported.append("brief_diagnostics")
-        if self.fdevice_syntax_only is not None and self.fdevice_syntax_only:
-            unsupported.append("fdevice_syntax_only")
-        if self.minimal is not None and self.minimal:
-            unsupported.append("minimal")
-        if self.numba_debug is not None and self.numba_debug:
-            unsupported.append("numba_debug")
-        if unsupported:
-            raise CUDAError(f"The following options are not supported by nvJitLink backend: {', '.join(unsupported)}")
-
-        return list(o.encode() for o in options)
-
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
         options = []
 
@@ -694,8 +600,7 @@ def as_bytes(self, backend: str) -> list[bytes]:
         Parameters
         ----------
         backend : str
-            The compiler backend to prepare options for. Must be either "nvrtc", "nvjitlink",
-            or "nvvm".
+            The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm".
 
         Returns
         -------
@@ -717,12 +622,10 @@ def as_bytes(self, backend: str) -> list[bytes]:
         backend = backend.lower()
         if backend == "nvrtc":
             return self._prepare_nvrtc_options()
-        elif backend == "nvjitlink":
-            return self._prepare_nvjitlink_options()
         elif backend == "nvvm":
             return self._prepare_nvvm_options(as_bytes=True)
         else:
-            raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvjitlink', 'nvvm'")
+            raise ValueError(f"Unknown backend '{backend}'. Must be one of: 'nvrtc', 'nvvm'")
 
     def __repr__(self):
         return f"ProgramOptions(name={self.name!r}, arch={self.arch!r})"
@@ -816,15 +719,16 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
             arch=options.arch,
             max_register_count=options.max_register_count,
             time=options.time,
+            link_time_optimization=options.link_time_optimization,
             debug=options.debug,
             lineinfo=options.lineinfo,
             ftz=options.ftz,
             prec_div=options.prec_div,
             prec_sqrt=options.prec_sqrt,
             fma=options.fma,
-            link_time_optimization=options.link_time_optimization,
             split_compile=options.split_compile,
             ptxas_options=options.ptxas_options,
+            no_cache=options.no_cache,
         )
 
     def close(self):
diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py
index e0c8d37b65..b7af4b6ab7 100644
--- a/cuda_core/tests/test_linker.py
+++ b/cuda_core/tests/test_linker.py
@@ -173,3 +173,36 @@ def test_linker_get_info_log(compile_ptx_functions):
     linker.link("cubin")
     log = linker.get_info_log()
     assert isinstance(log, str)
+
+
+@pytest.mark.skipif(is_culink_backend, reason="as_bytes() only supported for nvjitlink backend")
+def test_linker_options_as_bytes_nvjitlink():
+    """Test LinkerOptions.as_bytes() for nvJitLink backend"""
+    options = LinkerOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32)
+    nvjitlink_options = options.as_bytes("nvjitlink")
+
+    # Should return list of bytes
+    assert isinstance(nvjitlink_options, list)
+    assert all(isinstance(opt, bytes) for opt in nvjitlink_options)
+
+    # Decode to check content
+    options_str = [opt.decode() for opt in nvjitlink_options]
+    assert "-arch=sm_80" in options_str
+    assert "-g" in options_str
+    assert "-ftz=true" in options_str
+    assert "-maxrregcount=32" in options_str
+
+
+def test_linker_options_as_bytes_invalid_backend():
+    """Test LinkerOptions.as_bytes() with invalid backend"""
+    options = LinkerOptions(arch="sm_80")
+    with pytest.raises(ValueError, match="only supports 'nvjitlink' backend"):
+        options.as_bytes("invalid")
+
+
+@pytest.mark.skipif(not is_culink_backend, reason="driver backend test")
+def test_linker_options_as_bytes_driver_not_supported():
+    """Test that as_bytes() is not supported for driver backend"""
+    options = LinkerOptions(arch="sm_80")
+    with pytest.raises(RuntimeError, match="as_bytes\\(\\) only supports 'nvjitlink' backend"):
+        options.as_bytes("driver")
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 1da2372317..d576c9208a 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -501,23 +501,6 @@ def test_program_options_as_bytes_nvrtc():
     assert "--ftz=true" in options_str
 
 
-def test_program_options_as_bytes_nvjitlink():
-    """Test ProgramOptions.as_bytes() for nvJitLink backend"""
-    options = ProgramOptions(arch="sm_80", debug=True, ftz=True, max_register_count=32)
-    nvjitlink_options = options.as_bytes("nvjitlink")
-
-    # Should return list of bytes
-    assert isinstance(nvjitlink_options, list)
-    assert all(isinstance(opt, bytes) for opt in nvjitlink_options)
-
-    # Decode to check content
-    options_str = [opt.decode() for opt in nvjitlink_options]
-    assert "-arch=sm_80" in options_str
-    assert "-g" in options_str
-    assert "-ftz=true" in options_str
-    assert "-maxrregcount=32" in options_str
-
-
 @nvvm_available
 def test_program_options_as_bytes_nvvm():
     """Test ProgramOptions.as_bytes() for NVVM backend"""
@@ -543,13 +526,6 @@ def test_program_options_as_bytes_invalid_backend():
         options.as_bytes("invalid")
 
 
-def test_program_options_as_bytes_nvjitlink_unsupported_option():
-    """Test that unsupported options raise CUDAError for nvJitLink backend"""
-    options = ProgramOptions(arch="sm_80", std="c++17")
-    with pytest.raises(CUDAError, match="not supported by nvJitLink backend"):
-        options.as_bytes("nvjitlink")
-
-
 @nvvm_available
 def test_program_options_as_bytes_nvvm_unsupported_option():
     """Test that unsupported options raise CUDAError for NVVM backend"""

From bb74621220a526510e14b1d09ee59029dedf0e49 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Sun, 14 Dec 2025 20:12:54 +0000
Subject: [PATCH 14/15] fix two NVRTC bugs

- the program name is used for pch filename, but on Windows it is problematic
- trace.json could not be properly created with NVRTC 12.9
---
 cuda_core/cuda/core/experimental/_program.py | 2 +-
 cuda_core/tests/test_program.py              | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index a287131007..b49ebeb64d 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -294,7 +294,7 @@ class ProgramOptions:
         Default: False
     """
 
-    name: str | None = "<default program>"
+    name: str | None = "default_program"
     arch: str | None = None
     relocatable_device_code: bool | None = None
     extensible_whole_program: bool | None = None
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index d576c9208a..2b0ac5d617 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -231,7 +231,13 @@ def ptx_code_object():
         # TODO: Add test for pre_include once we have a suitable header in the test environment
         # ProgramOptions(pre_include="cuda_runtime.h"),
         ProgramOptions(no_cache=True),
-        ProgramOptions(fdevice_time_trace="trace.json"),
+        pytest.param(
+            ProgramOptions(fdevice_time_trace="trace.json"),
+            marks=pytest.mark.skipif(
+                (_get_nvrtc_version_for_tests() or 0) < 13000,
+                reason="buggy with NVRTC < 13.0 (File 'trace.json.json' could not be opened)",
+            ),
+        ),
         pytest.param(
             ProgramOptions(arch="sm_100", device_float128=True),
             marks=pytest.mark.skipif(

From eda06ff711a1b2bc457f17e3f13c332c8b6b53e3 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 15 Dec 2025 13:10:39 -0500
Subject: [PATCH 15/15] Apply suggestions from code review

Co-authored-by: Keith Kraus <keith.j.kraus@gmail.com>
---
 cuda_core/cuda/core/experimental/_program.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index b49ebeb64d..f3ad9af644 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -357,8 +357,7 @@ def __post_init__(self):
 
     def _prepare_nvrtc_options(self) -> list[bytes]:
         # Build NVRTC-specific options
-        options = []
-        options.append(f"-arch={self.arch}")
+        options = [f"-arch={self.arch}"]
         if self.relocatable_device_code is not None:
             options.append(f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}")
         if self.extensible_whole_program is not None and self.extensible_whole_program:
@@ -491,7 +490,7 @@ def _prepare_nvrtc_options(self) -> list[bytes]:
             )
         if self.numba_debug:
             options.append("--numba-debug")
-        return list(o.encode() for o in options)
+        return [o.encode() for o in options]
 
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], list[str]]:
         options = []
@@ -586,7 +585,7 @@ def _prepare_nvvm_options(self, as_bytes: bool = True) -> Union[list[bytes], lis
             raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}")
 
         if as_bytes:
-            return list(o.encode() for o in options)
+            return [o.encode() for o in options]
         else:
             return options