From afb4bd8a58eb27d612135c61933738ce16d9c1aa Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 22 Aug 2025 02:02:15 +0000
Subject: [PATCH 01/12] Initial plan


From b2bbf37a579c864e74f3dcd09e9e2e87f3d09ad0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 22 Aug 2025 02:08:49 +0000
Subject: [PATCH 02/12] Add comprehensive tests for ObjectCode from_ltoir,
 from_fatbin, from_object, and from_library constructors

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/tests/test_module.py | 114 +++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 2b0fc265eb..6fef43890f 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -180,6 +180,120 @@ def test_object_code_handle(get_saxpy_object_code):
     assert mod.handle is not None
 
 
+@pytest.fixture(scope="function")
+def get_ltoir_object_code(init_cuda):
+    # Create LTOIR code using link-time optimization
+    prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True))
+    mod = prog.compile("ltoir", name_expressions=("saxpy<float>", "saxpy<double>"))
+    return mod
+
+
+def test_object_code_load_ltoir(get_ltoir_object_code):
+    mod = get_ltoir_object_code
+    ltoir = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(ltoir, bytes)
+    mod_obj = ObjectCode.from_ltoir(ltoir, symbol_mapping=sym_map)
+    assert mod_obj.code == ltoir
+    assert mod_obj._code_type == "ltoir"
+    # ltoir doesn't support kernel retrieval directly as it's used for linking
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path):
+    mod = get_ltoir_object_code
+    ltoir = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(ltoir, bytes)
+    ltoir_file = tmp_path / "test.ltoir"
+    ltoir_file.write_bytes(ltoir)
+    mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map)
+    assert mod_obj.code == str(ltoir_file)
+    assert mod_obj._code_type == "ltoir"
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_fatbin(get_saxpy_kernel):
+    # Use cubin as a substitute for fatbin since they have similar structure
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    mod_obj = ObjectCode.from_fatbin(cubin, symbol_mapping=sym_map)
+    assert mod_obj.code == cubin
+    assert mod_obj._code_type == "fatbin"
+    # fatbin supports kernel retrieval
+    mod_obj.get_kernel("saxpy<double>")  # force loading
+
+
+def test_object_code_load_fatbin_from_file(get_saxpy_kernel, tmp_path):
+    # Use cubin as a substitute for fatbin since they have similar structure
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    fatbin_file = tmp_path / "test.fatbin"
+    fatbin_file.write_bytes(cubin)
+    mod_obj = ObjectCode.from_fatbin(str(fatbin_file), symbol_mapping=sym_map)
+    assert mod_obj.code == str(fatbin_file)
+    assert mod_obj._code_type == "fatbin"
+    mod_obj.get_kernel("saxpy<double>")  # force loading
+
+
+def test_object_code_load_object(get_saxpy_kernel):
+    # Use cubin as a substitute for object code since they're binary formats
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    mod_obj = ObjectCode.from_object(cubin, symbol_mapping=sym_map)
+    assert mod_obj.code == cubin
+    assert mod_obj._code_type == "object"
+    # object code doesn't support direct kernel retrieval
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path):
+    # Use cubin as a substitute for object code since they're binary formats
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    object_file = tmp_path / "test.o"
+    object_file.write_bytes(cubin)
+    mod_obj = ObjectCode.from_object(str(object_file), symbol_mapping=sym_map)
+    assert mod_obj.code == str(object_file)
+    assert mod_obj._code_type == "object"
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_library(get_saxpy_kernel):
+    # Use cubin as a substitute for library since they're binary formats
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    mod_obj = ObjectCode.from_library(cubin, symbol_mapping=sym_map)
+    assert mod_obj.code == cubin
+    assert mod_obj._code_type == "library"
+    # library code doesn't support direct kernel retrieval
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path):
+    # Use cubin as a substitute for library since they're binary formats
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    library_file = tmp_path / "test.a"
+    library_file.write_bytes(cubin)
+    mod_obj = ObjectCode.from_library(str(library_file), symbol_mapping=sym_map)
+    assert mod_obj.code == str(library_file)
+    assert mod_obj._code_type == "library"
+    assert mod_obj._handle is None  # Should only be loaded when needed
+
+
 def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check):
     krn, _ = get_saxpy_kernel
 

From abbb07a9231bdfd31d7a466bd14f18827122ca9a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 22 Aug 2025 02:10:52 +0000
Subject: [PATCH 03/12] Enhance ObjectCode constructor tests with error
 handling and default value validation

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/tests/test_module.py | 57 ++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 6fef43890f..ce5f82abca 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -198,6 +198,9 @@ def test_object_code_load_ltoir(get_ltoir_object_code):
     assert mod_obj._code_type == "ltoir"
     # ltoir doesn't support kernel retrieval directly as it's used for linking
     assert mod_obj._handle is None  # Should only be loaded when needed
+    # Test that get_kernel fails for unsupported code type
+    with pytest.raises(RuntimeError, match=r'Unsupported code type "ltoir"'):
+        mod_obj.get_kernel("saxpy<float>")
 
 
 def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path):
@@ -251,6 +254,9 @@ def test_object_code_load_object(get_saxpy_kernel):
     assert mod_obj._code_type == "object"
     # object code doesn't support direct kernel retrieval
     assert mod_obj._handle is None  # Should only be loaded when needed
+    # Test that get_kernel fails for unsupported code type
+    with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'):
+        mod_obj.get_kernel("saxpy<float>")
 
 
 def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path):
@@ -278,6 +284,9 @@ def test_object_code_load_library(get_saxpy_kernel):
     assert mod_obj._code_type == "library"
     # library code doesn't support direct kernel retrieval
     assert mod_obj._handle is None  # Should only be loaded when needed
+    # Test that get_kernel fails for unsupported code type
+    with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'):
+        mod_obj.get_kernel("saxpy<float>")
 
 
 def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path):
@@ -294,6 +303,54 @@ def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path):
     assert mod_obj._handle is None  # Should only be loaded when needed
 
 
+def test_object_code_constructors_with_name_and_symbol_mapping():
+    """Test that all from_* constructors properly set name and symbol_mapping"""
+    # Dummy data for testing
+    dummy_bytes = b"dummy_code_data"
+    test_name = "test_object"
+    test_sym_map = {"kernel1": "mangled_kernel1", "kernel2": "mangled_kernel2"}
+    
+    # Test all constructors
+    constructors = [
+        (ObjectCode.from_cubin, "cubin"),
+        (ObjectCode.from_ptx, "ptx"),
+        (ObjectCode.from_ltoir, "ltoir"),
+        (ObjectCode.from_fatbin, "fatbin"),
+        (ObjectCode.from_object, "object"),
+        (ObjectCode.from_library, "library"),
+    ]
+    
+    for constructor, code_type in constructors:
+        obj = constructor(dummy_bytes, name=test_name, symbol_mapping=test_sym_map)
+        assert obj.name == test_name
+        assert obj._sym_map == test_sym_map
+        assert obj._code_type == code_type
+        assert obj.code == dummy_bytes
+
+
+def test_object_code_constructors_default_values():
+    """Test that all from_* constructors handle default values correctly"""
+    # Dummy data for testing
+    dummy_bytes = b"dummy_code_data"
+    
+    # Test all constructors with defaults
+    constructors = [
+        (ObjectCode.from_cubin, "cubin"),
+        (ObjectCode.from_ptx, "ptx"),
+        (ObjectCode.from_ltoir, "ltoir"),
+        (ObjectCode.from_fatbin, "fatbin"),
+        (ObjectCode.from_object, "object"),
+        (ObjectCode.from_library, "library"),
+    ]
+    
+    for constructor, code_type in constructors:
+        obj = constructor(dummy_bytes)  # Use defaults
+        assert obj.name == ""  # Default name should be empty string
+        assert obj._sym_map == {}  # Default symbol mapping should be empty dict
+        assert obj._code_type == code_type
+        assert obj.code == dummy_bytes
+
+
 def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check):
     krn, _ = get_saxpy_kernel
 

From 9ee5bfc8f6146d34877a0069ba3522e493ff0ab8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 24 Aug 2025 05:48:56 +0000
Subject: [PATCH 04/12] Fix linker to handle file paths for ObjectCode
 constructors

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_linker.py | 62 ++++++++++++++------
 cuda_core/tests/test_module.py              | 65 +++++++++++++++++++++
 2 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index c3528a14e2..028c497f4e 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -395,32 +395,56 @@ def __init__(self, *object_codes: ObjectCode, options: LinkerOptions = None):
 
     def _add_code_object(self, object_code: ObjectCode):
         data = object_code._module
-        assert_type(data, bytes)
         with _exception_manager(self):
             name_str = f"{object_code.name}"
-            if _nvjitlink:
-                _nvjitlink.add_data(
-                    self._mnff.handle,
-                    self._input_type_from_code_type(object_code._code_type),
-                    data,
-                    len(data),
-                    name_str,
-                )
-            else:
-                name_bytes = name_str.encode()
-                handle_return(
-                    _driver.cuLinkAddData(
+            if isinstance(data, str):
+                # Handle file path input
+                if _nvjitlink:
+                    _nvjitlink.add_file(
+                        self._mnff.handle,
+                        self._input_type_from_code_type(object_code._code_type),
+                        data,
+                    )
+                else:
+                    name_bytes = name_str.encode()
+                    handle_return(
+                        _driver.cuLinkAddFile(
+                            self._mnff.handle,
+                            self._input_type_from_code_type(object_code._code_type),
+                            data.encode(),
+                            0,
+                            None,
+                            None,
+                        )
+                    )
+                    self._mnff.const_char_keep_alive.append(name_bytes)
+            elif isinstance(data, bytes):
+                # Handle bytes input (existing logic)
+                if _nvjitlink:
+                    _nvjitlink.add_data(
                         self._mnff.handle,
                         self._input_type_from_code_type(object_code._code_type),
                         data,
                         len(data),
-                        name_bytes,
-                        0,
-                        None,
-                        None,
+                        name_str,
                     )
-                )
-                self._mnff.const_char_keep_alive.append(name_bytes)
+                else:
+                    name_bytes = name_str.encode()
+                    handle_return(
+                        _driver.cuLinkAddData(
+                            self._mnff.handle,
+                            self._input_type_from_code_type(object_code._code_type),
+                            data,
+                            len(data),
+                            name_bytes,
+                            0,
+                            None,
+                            None,
+                        )
+                    )
+                    self._mnff.const_char_keep_alive.append(name_bytes)
+            else:
+                raise TypeError(f"Expected bytes or str, but got {type(data).__name__}")
 
     def link(self, target_type) -> ObjectCode:
         """
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index ce5f82abca..3a2ac9c314 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -351,6 +351,71 @@ def test_object_code_constructors_default_values():
         assert obj.code == dummy_bytes
 
 
+def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path):
+    """Test that ObjectCode created from file paths works with the Linker"""
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    assert isinstance(cubin, bytes)
+    
+    # Create temporary files for different code types
+    test_files = {}
+    for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]:
+        file_path = tmp_path / f"test.{code_type}"
+        file_path.write_bytes(cubin)  # Use cubin bytes as proxy for all types
+        test_files[code_type] = str(file_path)
+    
+    # Create ObjectCode instances from file paths
+    file_based_objects = []
+    for code_type, file_path in test_files.items():
+        if code_type == "cubin":
+            obj = ObjectCode.from_cubin(file_path, name=f"file_{code_type}")
+        elif code_type == "ptx":
+            obj = ObjectCode.from_ptx(file_path, name=f"file_{code_type}")
+        elif code_type == "ltoir":
+            obj = ObjectCode.from_ltoir(file_path, name=f"file_{code_type}")
+        elif code_type == "fatbin":
+            obj = ObjectCode.from_fatbin(file_path, name=f"file_{code_type}")
+        elif code_type == "object":
+            obj = ObjectCode.from_object(file_path, name=f"file_{code_type}")
+        elif code_type == "library":
+            obj = ObjectCode.from_library(file_path, name=f"file_{code_type}")
+        
+        # Verify the ObjectCode was created correctly
+        assert obj.code == file_path
+        assert obj._code_type == code_type
+        assert obj.name == f"file_{code_type}"
+        assert isinstance(obj._module, str)  # Should store the file path
+        file_based_objects.append(obj)
+    
+    # Test that these ObjectCode instances can be used with Linker
+    # Note: We can't actually link most of these types together in practice,
+    # but we can verify the linker accepts them and handles the file path correctly
+    from cuda.core.experimental import Linker, LinkerOptions
+    
+    # Test with ptx which should be linkable (use only PTX for actual linking)
+    ptx_obj = None
+    for obj in file_based_objects:
+        if obj._code_type == "ptx":
+            ptx_obj = obj
+            break
+    
+    if ptx_obj is not None:
+        # Create a simple linker test - this will test that _add_code_object
+        # handles file paths correctly by not crashing on the file path
+        try:
+            arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+            options = LinkerOptions(arch=arch)
+            # This should not crash - it should handle the file path in _add_code_object
+            linker = Linker(ptx_obj, options=options)
+            # We don't need to actually link since that might fail due to content,
+            # but creating the linker tests our file path handling
+            assert linker is not None
+        except Exception as e:
+            # If it fails, it should be due to content issues, not file path handling
+            # The key is that it should not fail with "Expected type bytes, but got str"
+            assert "Expected type bytes, but got str" not in str(e), f"File path handling failed: {e}"
+
+
 def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check):
     krn, _ = get_saxpy_kernel
 

From f3aad444cef8735404785b465b85658f5378b412 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 25 Aug 2025 00:27:04 +0000
Subject: [PATCH 05/12] Flatten nested if statements in linker _add_code_object
 method

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_linker.py | 78 ++++++++++-----------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 028c497f4e..ca498a6ad2 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -397,52 +397,52 @@ def _add_code_object(self, object_code: ObjectCode):
         data = object_code._module
         with _exception_manager(self):
             name_str = f"{object_code.name}"
-            if isinstance(data, str):
-                # Handle file path input
-                if _nvjitlink:
-                    _nvjitlink.add_file(
-                        self._mnff.handle,
-                        self._input_type_from_code_type(object_code._code_type),
-                        data,
-                    )
-                else:
-                    name_bytes = name_str.encode()
-                    handle_return(
-                        _driver.cuLinkAddFile(
-                            self._mnff.handle,
-                            self._input_type_from_code_type(object_code._code_type),
-                            data.encode(),
-                            0,
-                            None,
-                            None,
-                        )
-                    )
-                    self._mnff.const_char_keep_alive.append(name_bytes)
+            if _nvjitlink and isinstance(data, bytes):
+                # Handle bytes input with nvjitlink
+                _nvjitlink.add_data(
+                    self._mnff.handle,
+                    self._input_type_from_code_type(object_code._code_type),
+                    data,
+                    len(data),
+                    name_str,
+                )
+            elif _nvjitlink and isinstance(data, str):
+                # Handle file path input with nvjitlink
+                _nvjitlink.add_file(
+                    self._mnff.handle,
+                    self._input_type_from_code_type(object_code._code_type),
+                    data,
+                )
             elif isinstance(data, bytes):
-                # Handle bytes input (existing logic)
-                if _nvjitlink:
-                    _nvjitlink.add_data(
+                # Handle bytes input with driver API
+                name_bytes = name_str.encode()
+                handle_return(
+                    _driver.cuLinkAddData(
                         self._mnff.handle,
                         self._input_type_from_code_type(object_code._code_type),
                         data,
                         len(data),
-                        name_str,
+                        name_bytes,
+                        0,
+                        None,
+                        None,
                     )
-                else:
-                    name_bytes = name_str.encode()
-                    handle_return(
-                        _driver.cuLinkAddData(
-                            self._mnff.handle,
-                            self._input_type_from_code_type(object_code._code_type),
-                            data,
-                            len(data),
-                            name_bytes,
-                            0,
-                            None,
-                            None,
-                        )
+                )
+                self._mnff.const_char_keep_alive.append(name_bytes)
+            elif isinstance(data, str):
+                # Handle file path input with driver API
+                name_bytes = name_str.encode()
+                handle_return(
+                    _driver.cuLinkAddFile(
+                        self._mnff.handle,
+                        self._input_type_from_code_type(object_code._code_type),
+                        data.encode(),
+                        0,
+                        None,
+                        None,
                     )
-                    self._mnff.const_char_keep_alive.append(name_bytes)
+                )
+                self._mnff.const_char_keep_alive.append(name_bytes)
             else:
                 raise TypeError(f"Expected bytes or str, but got {type(data).__name__}")
 

From 5609cb165d3fcf7ca0222054590c08c2b322c76c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 25 Aug 2025 00:57:30 +0000
Subject: [PATCH 06/12] Address review comments: reorganize tests, add
 NVCC-based testing, fix fixture naming

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/tests/test_module.py | 446 ++++++++++++++++++++++-----------
 1 file changed, 306 insertions(+), 140 deletions(-)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 3a2ac9c314..5376fc1ae3 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -94,6 +94,14 @@ def get_saxpy_object_code(init_cuda):
     return mod
 
 
+@pytest.fixture(scope="function")
+def get_saxpy_kernel_ltoir(init_cuda):
+    # Create LTOIR code using link-time optimization
+    prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True))
+    mod = prog.compile("ltoir", name_expressions=("saxpy<float>", "saxpy<double>"))
+    return mod
+
+
 def test_get_kernel(init_cuda):
     kernel = """extern "C" __global__ void ABC() { }"""
 
@@ -163,6 +171,20 @@ def test_object_code_load_ptx(get_saxpy_kernel_ptx):
     mod_obj.get_kernel("saxpy<double>")  # force loading
 
 
+def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path):
+    ptx, mod = get_saxpy_kernel_ptx
+    sym_map = mod._sym_map
+    assert isinstance(ptx, str)
+    ptx_file = tmp_path / "test.ptx"
+    ptx_file.write_text(ptx)
+    mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map)
+    assert mod_obj.code == str(ptx_file)
+    assert mod_obj._code_type == "ptx"
+    if not Program._can_load_generated_ptx():
+        pytest.skip("PTX version too new for current driver")
+    mod_obj.get_kernel("saxpy<double>")  # force loading
+
+
 def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
     _, mod = get_saxpy_kernel
     cubin = mod._module
@@ -180,16 +202,8 @@ def test_object_code_handle(get_saxpy_object_code):
     assert mod.handle is not None
 
 
-@pytest.fixture(scope="function")
-def get_ltoir_object_code(init_cuda):
-    # Create LTOIR code using link-time optimization
-    prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True))
-    mod = prog.compile("ltoir", name_expressions=("saxpy<float>", "saxpy<double>"))
-    return mod
-
-
-def test_object_code_load_ltoir(get_ltoir_object_code):
-    mod = get_ltoir_object_code
+def test_object_code_load_ltoir(get_saxpy_kernel_ltoir):
+    mod = get_saxpy_kernel_ltoir
     ltoir = mod._module
     sym_map = mod._sym_map
     assert isinstance(ltoir, bytes)
@@ -203,8 +217,8 @@ def test_object_code_load_ltoir(get_ltoir_object_code):
         mod_obj.get_kernel("saxpy<float>")
 
 
-def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path):
-    mod = get_ltoir_object_code
+def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path):
+    mod = get_saxpy_kernel_ltoir
     ltoir = mod._module
     sym_map = mod._sym_map
     assert isinstance(ltoir, bytes)
@@ -216,139 +230,291 @@ def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path):
     assert mod_obj._handle is None  # Should only be loaded when needed
 
 
-def test_object_code_load_fatbin(get_saxpy_kernel):
-    # Use cubin as a substitute for fatbin since they have similar structure
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    mod_obj = ObjectCode.from_fatbin(cubin, symbol_mapping=sym_map)
-    assert mod_obj.code == cubin
-    assert mod_obj._code_type == "fatbin"
-    # fatbin supports kernel retrieval
-    mod_obj.get_kernel("saxpy<double>")  # force loading
-
-
-def test_object_code_load_fatbin_from_file(get_saxpy_kernel, tmp_path):
-    # Use cubin as a substitute for fatbin since they have similar structure
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    fatbin_file = tmp_path / "test.fatbin"
-    fatbin_file.write_bytes(cubin)
-    mod_obj = ObjectCode.from_fatbin(str(fatbin_file), symbol_mapping=sym_map)
-    assert mod_obj.code == str(fatbin_file)
-    assert mod_obj._code_type == "fatbin"
-    mod_obj.get_kernel("saxpy<double>")  # force loading
-
-
-def test_object_code_load_object(get_saxpy_kernel):
-    # Use cubin as a substitute for object code since they're binary formats
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    mod_obj = ObjectCode.from_object(cubin, symbol_mapping=sym_map)
-    assert mod_obj.code == cubin
-    assert mod_obj._code_type == "object"
+def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test fatbin loading using NVCC-generated fatbins.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    # Generate fatbin for multiple architectures
+    archs = ["sm_75", "sm_90", current_arch]
+    arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
+    
+    fatbin_file = tmp_path / "kernel.fatbin"
+    
+    try:
+        # Generate fatbin using nvcc
+        cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
+    
+    # Test loading fatbin from bytes (in-memory)
+    fatbin_bytes = fatbin_file.read_bytes()
+    mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory")
+    assert mod_obj_mem.code == fatbin_bytes
+    assert mod_obj_mem._code_type == "fatbin"
+    assert mod_obj_mem.name == "fatbin_memory"
+
+
+def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test fatbin loading from file path using NVCC-generated fatbins.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    # Generate fatbin for multiple architectures
+    archs = ["sm_75", "sm_90", current_arch]
+    arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
+    
+    fatbin_file = tmp_path / "kernel.fatbin"
+    
+    try:
+        # Generate fatbin using nvcc
+        cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
+    
+    # Test loading fatbin from file path
+    mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file")
+    assert mod_obj_file.code == str(fatbin_file)
+    assert mod_obj_file._code_type == "fatbin"
+    assert mod_obj_file.name == "fatbin_file"
+
+
+def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test object code loading using NVCC-generated object files.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    object_file = tmp_path / "kernel.o"
+    
+    try:
+        # Generate object file using nvcc
+        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate object file with nvcc: {e}")
+    
+    # Test loading object from bytes (in-memory)
+    object_bytes = object_file.read_bytes()
+    mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory")
+    assert mod_obj_mem.code == object_bytes
+    assert mod_obj_mem._code_type == "object"
+    assert mod_obj_mem.name == "object_memory"
     # object code doesn't support direct kernel retrieval
-    assert mod_obj._handle is None  # Should only be loaded when needed
+    assert mod_obj_mem._handle is None  # Should only be loaded when needed
     # Test that get_kernel fails for unsupported code type
     with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'):
-        mod_obj.get_kernel("saxpy<float>")
-
-
-def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path):
-    # Use cubin as a substitute for object code since they're binary formats
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    object_file = tmp_path / "test.o"
-    object_file.write_bytes(cubin)
-    mod_obj = ObjectCode.from_object(str(object_file), symbol_mapping=sym_map)
-    assert mod_obj.code == str(object_file)
-    assert mod_obj._code_type == "object"
-    assert mod_obj._handle is None  # Should only be loaded when needed
+        mod_obj_mem.get_kernel("simple_kernel")
 
 
-def test_object_code_load_library(get_saxpy_kernel):
-    # Use cubin as a substitute for library since they're binary formats
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    mod_obj = ObjectCode.from_library(cubin, symbol_mapping=sym_map)
-    assert mod_obj.code == cubin
-    assert mod_obj._code_type == "library"
-    # library code doesn't support direct kernel retrieval
-    assert mod_obj._handle is None  # Should only be loaded when needed
-    # Test that get_kernel fails for unsupported code type
-    with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'):
-        mod_obj.get_kernel("saxpy<float>")
-
-
-def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path):
-    # Use cubin as a substitute for library since they're binary formats
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    library_file = tmp_path / "test.a"
-    library_file.write_bytes(cubin)
-    mod_obj = ObjectCode.from_library(str(library_file), symbol_mapping=sym_map)
-    assert mod_obj.code == str(library_file)
-    assert mod_obj._code_type == "library"
-    assert mod_obj._handle is None  # Should only be loaded when needed
-
-
-def test_object_code_constructors_with_name_and_symbol_mapping():
-    """Test that all from_* constructors properly set name and symbol_mapping"""
-    # Dummy data for testing
-    dummy_bytes = b"dummy_code_data"
-    test_name = "test_object"
-    test_sym_map = {"kernel1": "mangled_kernel1", "kernel2": "mangled_kernel2"}
-    
-    # Test all constructors
-    constructors = [
-        (ObjectCode.from_cubin, "cubin"),
-        (ObjectCode.from_ptx, "ptx"),
-        (ObjectCode.from_ltoir, "ltoir"),
-        (ObjectCode.from_fatbin, "fatbin"),
-        (ObjectCode.from_object, "object"),
-        (ObjectCode.from_library, "library"),
-    ]
-    
-    for constructor, code_type in constructors:
-        obj = constructor(dummy_bytes, name=test_name, symbol_mapping=test_sym_map)
-        assert obj.name == test_name
-        assert obj._sym_map == test_sym_map
-        assert obj._code_type == code_type
-        assert obj.code == dummy_bytes
-
-
-def test_object_code_constructors_default_values():
-    """Test that all from_* constructors handle default values correctly"""
-    # Dummy data for testing
-    dummy_bytes = b"dummy_code_data"
-    
-    # Test all constructors with defaults
-    constructors = [
-        (ObjectCode.from_cubin, "cubin"),
-        (ObjectCode.from_ptx, "ptx"),
-        (ObjectCode.from_ltoir, "ltoir"),
-        (ObjectCode.from_fatbin, "fatbin"),
-        (ObjectCode.from_object, "object"),
-        (ObjectCode.from_library, "library"),
-    ]
-    
-    for constructor, code_type in constructors:
-        obj = constructor(dummy_bytes)  # Use defaults
-        assert obj.name == ""  # Default name should be empty string
-        assert obj._sym_map == {}  # Default symbol mapping should be empty dict
-        assert obj._code_type == code_type
-        assert obj.code == dummy_bytes
+def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test object code loading from file path using NVCC-generated object files.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    object_file = tmp_path / "kernel.o"
+    
+    try:
+        # Generate object file using nvcc
+        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate object file with nvcc: {e}")
+    
+    # Test loading object from file path
+    mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file")
+    assert mod_obj_file.code == str(object_file)
+    assert mod_obj_file._code_type == "object"
+    assert mod_obj_file.name == "object_file"
+    assert mod_obj_file._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test library loading using NVCC-generated library files.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    object_file = tmp_path / "kernel.o"
+    library_file = tmp_path / "libkernel.a"
+    
+    try:
+        # Generate object file first
+        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+        
+        # Create library from object file
+        cmd = f"ar rcs {library_file} {object_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
+    
+    # Test loading library from bytes (in-memory)
+    library_bytes = library_file.read_bytes()
+    mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory")
+    assert mod_obj_mem.code == library_bytes
+    assert mod_obj_mem._code_type == "library"
+    assert mod_obj_mem.name == "library_memory"
+    assert mod_obj_mem._handle is None  # Should only be loaded when needed
+
+
+def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path):
+    """
+    Test library loading from file path using NVCC-generated library files.
+    TODO: Can drop NVCC from test dependency once #156 is resolved.
+    """
+    import shutil
+    import subprocess
+    
+    # Check if NVCC is available
+    if not shutil.which("nvcc"):
+        pytest.skip("NVCC not available in PATH")
+    
+    # Create a simple CUDA kernel file
+    kernel_source = '''
+extern "C" __global__ void simple_kernel(float* data) {
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    data[idx] = data[idx] * 2.0f;
+}
+'''
+    
+    cu_file = tmp_path / "kernel.cu"
+    cu_file.write_text(kernel_source)
+    
+    # Get current device architecture
+    from cuda.core.experimental import Device
+    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
+    
+    object_file = tmp_path / "kernel.o"
+    library_file = tmp_path / "libkernel.a"
+    
+    try:
+        # Generate object file first
+        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+        
+        # Create library from object file
+        cmd = f"ar rcs {library_file} {object_file}"
+        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+    except subprocess.CalledProcessError as e:
+        pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
+    
+    # Test loading library from file path
+    mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file")
+    assert mod_obj_file.code == str(library_file)
+    assert mod_obj_file._code_type == "library"
+    assert mod_obj_file.name == "library_file"
+    assert mod_obj_file._handle is None  # Should only be loaded when needed
 
 
 def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path):

From 19fbe2bacf9083660df33bd3463eeb54bbf1e4fb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 11 Sep 2025 14:34:51 +0000
Subject: [PATCH 07/12] Remove self-explanatory comments and clarify if
 conditions in linker

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_linker.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index c2e9c57fec..97af99be6a 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -398,7 +398,6 @@ def _add_code_object(self, object_code: ObjectCode):
         with _exception_manager(self):
             name_str = f"{object_code.name}"
             if _nvjitlink and isinstance(data, bytes):
-                # Handle bytes input with nvjitlink
                 _nvjitlink.add_data(
                     self._mnff.handle,
                     self._input_type_from_code_type(object_code._code_type),
@@ -407,14 +406,12 @@ def _add_code_object(self, object_code: ObjectCode):
                     name_str,
                 )
             elif _nvjitlink and isinstance(data, str):
-                # Handle file path input with nvjitlink
                 _nvjitlink.add_file(
                     self._mnff.handle,
                     self._input_type_from_code_type(object_code._code_type),
                     data,
                 )
-            elif isinstance(data, bytes):
-                # Handle bytes input with driver API
+            elif (not _nvjitlink) and isinstance(data, bytes):
                 name_bytes = name_str.encode()
                 handle_return(
                     _driver.cuLinkAddData(
@@ -429,8 +426,7 @@ def _add_code_object(self, object_code: ObjectCode):
                     )
                 )
                 self._mnff.const_char_keep_alive.append(name_bytes)
-            elif isinstance(data, str):
-                # Handle file path input with driver API
+            elif (not _nvjitlink) and isinstance(data, str):
                 name_bytes = name_str.encode()
                 handle_return(
                     _driver.cuLinkAddFile(

From 68003ba5a53216d2be1d9febc1b5cc2035a7fbc9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 17 Sep 2025 23:23:31 +0000
Subject: [PATCH 08/12] [pre-commit.ci] auto code formatting

---
 cuda_core/tests/test_module.py | 134 +++++++++++++++++----------------
 1 file changed, 70 insertions(+), 64 deletions(-)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 5376fc1ae3..b9622a435c 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -237,39 +237,40 @@ def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     # Generate fatbin for multiple architectures
     archs = ["sm_75", "sm_90", current_arch]
     arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
-    
+
     fatbin_file = tmp_path / "kernel.fatbin"
-    
+
     try:
         # Generate fatbin using nvcc
         cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
-    
+
     # Test loading fatbin from bytes (in-memory)
     fatbin_bytes = fatbin_file.read_bytes()
     mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory")
@@ -285,39 +286,40 @@ def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     # Generate fatbin for multiple architectures
     archs = ["sm_75", "sm_90", current_arch]
     arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
-    
+
     fatbin_file = tmp_path / "kernel.fatbin"
-    
+
     try:
         # Generate fatbin using nvcc
         cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
-    
+
     # Test loading fatbin from file path
     mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file")
     assert mod_obj_file.code == str(fatbin_file)
@@ -332,35 +334,36 @@ def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     object_file = tmp_path / "kernel.o"
-    
+
     try:
         # Generate object file using nvcc
         cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate object file with nvcc: {e}")
-    
+
     # Test loading object from bytes (in-memory)
     object_bytes = object_file.read_bytes()
     mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory")
@@ -381,35 +384,36 @@ def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     object_file = tmp_path / "kernel.o"
-    
+
     try:
         # Generate object file using nvcc
         cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate object file with nvcc: {e}")
-    
+
     # Test loading object from file path
     mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file")
     assert mod_obj_file.code == str(object_file)
@@ -425,40 +429,41 @@ def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     object_file = tmp_path / "kernel.o"
     library_file = tmp_path / "libkernel.a"
-    
+
     try:
         # Generate object file first
         cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-        
+
         # Create library from object file
         cmd = f"ar rcs {library_file} {object_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
-    
+
     # Test loading library from bytes (in-memory)
     library_bytes = library_file.read_bytes()
     mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory")
@@ -475,40 +480,41 @@ def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path):
     """
     import shutil
     import subprocess
-    
+
     # Check if NVCC is available
     if not shutil.which("nvcc"):
         pytest.skip("NVCC not available in PATH")
-    
+
     # Create a simple CUDA kernel file
-    kernel_source = '''
+    kernel_source = """
 extern "C" __global__ void simple_kernel(float* data) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     data[idx] = data[idx] * 2.0f;
 }
-'''
-    
+"""
+
     cu_file = tmp_path / "kernel.cu"
     cu_file.write_text(kernel_source)
-    
+
     # Get current device architecture
     from cuda.core.experimental import Device
+
     current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-    
+
     object_file = tmp_path / "kernel.o"
     library_file = tmp_path / "libkernel.a"
-    
+
     try:
         # Generate object file first
         cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-        
+
         # Create library from object file
         cmd = f"ar rcs {library_file} {object_file}"
         subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
-    
+
     # Test loading library from file path
     mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file")
     assert mod_obj_file.code == str(library_file)
@@ -522,14 +528,14 @@ def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path):
     _, mod = get_saxpy_kernel
     cubin = mod._module
     assert isinstance(cubin, bytes)
-    
+
     # Create temporary files for different code types
     test_files = {}
     for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]:
         file_path = tmp_path / f"test.{code_type}"
         file_path.write_bytes(cubin)  # Use cubin bytes as proxy for all types
         test_files[code_type] = str(file_path)
-    
+
     # Create ObjectCode instances from file paths
     file_based_objects = []
     for code_type, file_path in test_files.items():
@@ -545,26 +551,26 @@ def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path):
             obj = ObjectCode.from_object(file_path, name=f"file_{code_type}")
         elif code_type == "library":
             obj = ObjectCode.from_library(file_path, name=f"file_{code_type}")
-        
+
         # Verify the ObjectCode was created correctly
         assert obj.code == file_path
         assert obj._code_type == code_type
         assert obj.name == f"file_{code_type}"
         assert isinstance(obj._module, str)  # Should store the file path
         file_based_objects.append(obj)
-    
+
     # Test that these ObjectCode instances can be used with Linker
     # Note: We can't actually link most of these types together in practice,
     # but we can verify the linker accepts them and handles the file path correctly
     from cuda.core.experimental import Linker, LinkerOptions
-    
+
     # Test with ptx which should be linkable (use only PTX for actual linking)
     ptx_obj = None
     for obj in file_based_objects:
         if obj._code_type == "ptx":
             ptx_obj = obj
             break
-    
+
     if ptx_obj is not None:
         # Create a simple linker test - this will test that _add_code_object
         # handles file paths correctly by not crashing on the file path

From 62e240d9b7ea85ee5d9cf6642a226b877a208787 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 6 Oct 2025 01:03:59 +0000
Subject: [PATCH 09/12] nit: reorder test to make it easier to follow

---
 cuda_core/tests/test_module.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 51d660d781..27425dc722 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -150,16 +150,6 @@ def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
     assert isinstance(value, expected_type), f"Expected {attr} to be of type {expected_type}, but got {type(value)}"
 
 
-def test_object_code_load_cubin(get_saxpy_kernel):
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    sym_map = mod._sym_map
-    assert isinstance(cubin, bytes)
-    mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map)
-    assert mod.code == cubin
-    mod.get_kernel("saxpy<double>")  # force loading
-
-
 def test_object_code_load_ptx(get_saxpy_kernel_ptx):
     ptx, mod = get_saxpy_kernel_ptx
     sym_map = mod._sym_map
@@ -184,6 +174,16 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path):
     mod_obj.get_kernel("saxpy<double>")  # force loading
 
 
+def test_object_code_load_cubin(get_saxpy_kernel):
+    _, mod = get_saxpy_kernel
+    cubin = mod._module
+    sym_map = mod._sym_map
+    assert isinstance(cubin, bytes)
+    mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map)
+    assert mod.code == cubin
+    mod.get_kernel("saxpy<double>")  # force loading
+
+
 def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
     _, mod = get_saxpy_kernel
     cubin = mod._module

From 45d0b244a17270ec90132a9e2370049aa29d058e Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 6 Oct 2025 01:12:50 +0000
Subject: [PATCH 10/12] rename fixture for clarity + purge
 object/library/fatbin tests for now

---
 cuda_core/tests/test_module.py | 420 +++------------------------------
 1 file changed, 27 insertions(+), 393 deletions(-)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 27425dc722..be215c92c8 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -60,7 +60,7 @@ def test_object_code_init_disabled():
 
 
 @pytest.fixture(scope="function")
-def get_saxpy_kernel(init_cuda):
+def get_saxpy_kernel_cubin(init_cuda):
     # prepare program
     prog = Program(SAXPY_KERNEL, code_type="c++")
     mod = prog.compile(
@@ -74,6 +74,7 @@ def get_saxpy_kernel(init_cuda):
 
 @pytest.fixture(scope="function")
 def get_saxpy_kernel_ptx(init_cuda):
+    # prepare program
     prog = Program(SAXPY_KERNEL, code_type="c++")
     mod = prog.compile(
         "ptx",
@@ -83,16 +84,6 @@ def get_saxpy_kernel_ptx(init_cuda):
     return ptx, mod
 
 
-@pytest.fixture(scope="function")
-def get_saxpy_object_code(init_cuda):
-    prog = Program(SAXPY_KERNEL, code_type="c++")
-    mod = prog.compile(
-        "cubin",
-        name_expressions=("saxpy<float>", "saxpy<double>"),
-    )
-    return mod
-
-
 @pytest.fixture(scope="function")
 def get_saxpy_kernel_ltoir(init_cuda):
     # Create LTOIR code using link-time optimization
@@ -137,8 +128,8 @@ def test_get_kernel(init_cuda):
         ("cluster_scheduling_policy_preference", int),
     ],
 )
-def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
-    kernel, _ = get_saxpy_kernel
+def test_read_only_kernel_attributes(get_saxpy_kernel_cubin, attr, expected_type):
+    kernel, _ = get_saxpy_kernel_cubin
     method = getattr(kernel.attributes, attr)
     # get the value without providing a device ordinal
     value = method()
@@ -174,8 +165,8 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path):
     mod_obj.get_kernel("saxpy<double>")  # force loading
 
 
-def test_object_code_load_cubin(get_saxpy_kernel):
-    _, mod = get_saxpy_kernel
+def test_object_code_load_cubin(get_saxpy_kernel_cubin):
+    _, mod = get_saxpy_kernel_cubin
     cubin = mod._module
     sym_map = mod._sym_map
     assert isinstance(cubin, bytes)
@@ -184,8 +175,8 @@ def test_object_code_load_cubin(get_saxpy_kernel):
     mod.get_kernel("saxpy<double>")  # force loading
 
 
-def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
-    _, mod = get_saxpy_kernel
+def test_object_code_load_cubin_from_file(get_saxpy_kernel_cubin, tmp_path):
+    _, mod = get_saxpy_kernel_cubin
     cubin = mod._module
     sym_map = mod._sym_map
     assert isinstance(cubin, bytes)
@@ -210,7 +201,7 @@ def test_object_code_load_ltoir(get_saxpy_kernel_ltoir):
     assert mod_obj.code == ltoir
     assert mod_obj._code_type == "ltoir"
     # ltoir doesn't support kernel retrieval directly as it's used for linking
-    assert mod_obj._handle is None  # Should only be loaded when needed
+    assert mod_obj._handle is None
     # Test that get_kernel fails for unsupported code type
     with pytest.raises(RuntimeError, match=r'Unsupported code type "ltoir"'):
         mod_obj.get_kernel("saxpy<float>")
@@ -226,369 +217,12 @@ def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path):
     mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map)
     assert mod_obj.code == str(ltoir_file)
     assert mod_obj._code_type == "ltoir"
-    assert mod_obj._handle is None  # Should only be loaded when needed
-
-
-def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test fatbin loading using NVCC-generated fatbins.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    # Generate fatbin for multiple architectures
-    archs = ["sm_75", "sm_90", current_arch]
-    arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
-
-    fatbin_file = tmp_path / "kernel.fatbin"
-
-    try:
-        # Generate fatbin using nvcc
-        cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
-
-    # Test loading fatbin from bytes (in-memory)
-    fatbin_bytes = fatbin_file.read_bytes()
-    mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory")
-    assert mod_obj_mem.code == fatbin_bytes
-    assert mod_obj_mem._code_type == "fatbin"
-    assert mod_obj_mem.name == "fatbin_memory"
-
-
-def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test fatbin loading from file path using NVCC-generated fatbins.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    # Generate fatbin for multiple architectures
-    archs = ["sm_75", "sm_90", current_arch]
-    arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs))
-
-    fatbin_file = tmp_path / "kernel.fatbin"
-
-    try:
-        # Generate fatbin using nvcc
-        cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate fatbin with nvcc: {e}")
-
-    # Test loading fatbin from file path
-    mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file")
-    assert mod_obj_file.code == str(fatbin_file)
-    assert mod_obj_file._code_type == "fatbin"
-    assert mod_obj_file.name == "fatbin_file"
-
-
-def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test object code loading using NVCC-generated object files.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    object_file = tmp_path / "kernel.o"
-
-    try:
-        # Generate object file using nvcc
-        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate object file with nvcc: {e}")
-
-    # Test loading object from bytes (in-memory)
-    object_bytes = object_file.read_bytes()
-    mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory")
-    assert mod_obj_mem.code == object_bytes
-    assert mod_obj_mem._code_type == "object"
-    assert mod_obj_mem.name == "object_memory"
-    # object code doesn't support direct kernel retrieval
-    assert mod_obj_mem._handle is None  # Should only be loaded when needed
-    # Test that get_kernel fails for unsupported code type
-    with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'):
-        mod_obj_mem.get_kernel("simple_kernel")
-
-
-def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test object code loading from file path using NVCC-generated object files.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    object_file = tmp_path / "kernel.o"
-
-    try:
-        # Generate object file using nvcc
-        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate object file with nvcc: {e}")
-
-    # Test loading object from file path
-    mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file")
-    assert mod_obj_file.code == str(object_file)
-    assert mod_obj_file._code_type == "object"
-    assert mod_obj_file.name == "object_file"
-    assert mod_obj_file._handle is None  # Should only be loaded when needed
-
-
-def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test library loading using NVCC-generated library files.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    object_file = tmp_path / "kernel.o"
-    library_file = tmp_path / "libkernel.a"
-
-    try:
-        # Generate object file first
-        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-
-        # Create library from object file
-        cmd = f"ar rcs {library_file} {object_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
-
-    # Test loading library from bytes (in-memory)
-    library_bytes = library_file.read_bytes()
-    mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory")
-    assert mod_obj_mem.code == library_bytes
-    assert mod_obj_mem._code_type == "library"
-    assert mod_obj_mem.name == "library_memory"
-    assert mod_obj_mem._handle is None  # Should only be loaded when needed
-
-
-def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path):
-    """
-    Test library loading from file path using NVCC-generated library files.
-    TODO: Can drop NVCC from test dependency once #156 is resolved.
-    """
-    import shutil
-    import subprocess
-
-    # Check if NVCC is available
-    if not shutil.which("nvcc"):
-        pytest.skip("NVCC not available in PATH")
-
-    # Create a simple CUDA kernel file
-    kernel_source = """
-extern "C" __global__ void simple_kernel(float* data) {
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    data[idx] = data[idx] * 2.0f;
-}
-"""
-
-    cu_file = tmp_path / "kernel.cu"
-    cu_file.write_text(kernel_source)
-
-    # Get current device architecture
-    from cuda.core.experimental import Device
-
-    current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-
-    object_file = tmp_path / "kernel.o"
-    library_file = tmp_path / "libkernel.a"
-
-    try:
-        # Generate object file first
-        cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-
-        # Create library from object file
-        cmd = f"ar rcs {library_file} {object_file}"
-        subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        pytest.skip(f"Failed to generate library with nvcc/ar: {e}")
-
-    # Test loading library from file path
-    mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file")
-    assert mod_obj_file.code == str(library_file)
-    assert mod_obj_file._code_type == "library"
-    assert mod_obj_file.name == "library_file"
-    assert mod_obj_file._handle is None  # Should only be loaded when needed
-
+    # ltoir doesn't support kernel retrieval directly as it's used for linking
+    assert mod_obj._handle is None
 
-def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path):
-    """Test that ObjectCode created from file paths works with the Linker"""
-    _, mod = get_saxpy_kernel
-    cubin = mod._module
-    assert isinstance(cubin, bytes)
 
-    # Create temporary files for different code types
-    test_files = {}
-    for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]:
-        file_path = tmp_path / f"test.{code_type}"
-        file_path.write_bytes(cubin)  # Use cubin bytes as proxy for all types
-        test_files[code_type] = str(file_path)
-
-    # Create ObjectCode instances from file paths
-    file_based_objects = []
-    for code_type, file_path in test_files.items():
-        if code_type == "cubin":
-            obj = ObjectCode.from_cubin(file_path, name=f"file_{code_type}")
-        elif code_type == "ptx":
-            obj = ObjectCode.from_ptx(file_path, name=f"file_{code_type}")
-        elif code_type == "ltoir":
-            obj = ObjectCode.from_ltoir(file_path, name=f"file_{code_type}")
-        elif code_type == "fatbin":
-            obj = ObjectCode.from_fatbin(file_path, name=f"file_{code_type}")
-        elif code_type == "object":
-            obj = ObjectCode.from_object(file_path, name=f"file_{code_type}")
-        elif code_type == "library":
-            obj = ObjectCode.from_library(file_path, name=f"file_{code_type}")
-
-        # Verify the ObjectCode was created correctly
-        assert obj.code == file_path
-        assert obj._code_type == code_type
-        assert obj.name == f"file_{code_type}"
-        assert isinstance(obj._module, str)  # Should store the file path
-        file_based_objects.append(obj)
-
-    # Test that these ObjectCode instances can be used with Linker
-    # Note: We can't actually link most of these types together in practice,
-    # but we can verify the linker accepts them and handles the file path correctly
-    from cuda.core.experimental import Linker, LinkerOptions
-
-    # Test with ptx which should be linkable (use only PTX for actual linking)
-    ptx_obj = None
-    for obj in file_based_objects:
-        if obj._code_type == "ptx":
-            ptx_obj = obj
-            break
-
-    if ptx_obj is not None:
-        # Create a simple linker test - this will test that _add_code_object
-        # handles file paths correctly by not crashing on the file path
-        try:
-            arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
-            options = LinkerOptions(arch=arch)
-            # This should not crash - it should handle the file path in _add_code_object
-            linker = Linker(ptx_obj, options=options)
-            # We don't need to actually link since that might fail due to content,
-            # but creating the linker tests our file path handling
-            assert linker is not None
-        except Exception as e:
-            # If it fails, it should be due to content issues, not file path handling
-            # The key is that it should not fail with "Expected type bytes, but got str"
-            assert "Expected type bytes, but got str" not in str(e), f"File path handling failed: {e}"
-
-
-def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check):
-    krn, _ = get_saxpy_kernel
+def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check):
+    krn, _ = get_saxpy_kernel_cubin
 
     if cuda12_4_prerequisite_check:
         assert krn.num_arguments == 5
@@ -666,8 +300,8 @@ def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequi
 
 @pytest.mark.parametrize("block_size", [32, 64, 96, 120, 128, 256])
 @pytest.mark.parametrize("smem_size_per_block", [0, 32, 4096])
-def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel, block_size, smem_size_per_block):
-    kernel, _ = get_saxpy_kernel
+def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel_cubin, block_size, smem_size_per_block):
+    kernel, _ = get_saxpy_kernel_cubin
     dev_props = Device().properties
     assert block_size <= dev_props.max_threads_per_block
     assert smem_size_per_block <= dev_props.max_shared_memory_per_block
@@ -683,9 +317,9 @@ def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel, block_s
 
 @pytest.mark.parametrize("block_size_limit", [32, 64, 96, 120, 128, 256, 0])
 @pytest.mark.parametrize("smem_size_per_block", [0, 32, 4096])
-def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel, block_size_limit, smem_size_per_block):
+def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel_cubin, block_size_limit, smem_size_per_block):
     """Tests use case when shared memory needed is independent on the block size"""
-    kernel, _ = get_saxpy_kernel
+    kernel, _ = get_saxpy_kernel_cubin
     dev_props = Device().properties
     assert block_size_limit <= dev_props.max_threads_per_block
     assert smem_size_per_block <= dev_props.max_shared_memory_per_block
@@ -710,9 +344,9 @@ def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel, block_siz
 
 @pytest.mark.skipif(numba is None, reason="Test requires numba to be installed")
 @pytest.mark.parametrize("block_size_limit", [32, 64, 96, 120, 128, 277, 0])
-def test_occupancy_max_potential_block_size_b2dsize(get_saxpy_kernel, block_size_limit):
+def test_occupancy_max_potential_block_size_b2dsize(get_saxpy_kernel_cubin, block_size_limit):
     """Tests use case when shared memory needed depends on the block size"""
-    kernel, _ = get_saxpy_kernel
+    kernel, _ = get_saxpy_kernel_cubin
 
     def shared_memory_needed(block_size: numba.intc) -> numba.size_t:
         "Size of dynamic shared memory needed by kernel of this block size"
@@ -737,8 +371,8 @@ def shared_memory_needed(block_size: numba.intc) -> numba.size_t:
 
 
 @pytest.mark.parametrize("num_blocks_per_sm, block_size", [(4, 32), (2, 64), (2, 96), (3, 120), (2, 128), (1, 256)])
-def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, num_blocks_per_sm, block_size):
-    kernel, _ = get_saxpy_kernel
+def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel_cubin, num_blocks_per_sm, block_size):
+    kernel, _ = get_saxpy_kernel_cubin
     dev_props = Device().properties
     assert block_size <= dev_props.max_threads_per_block
     assert num_blocks_per_sm * block_size <= dev_props.max_threads_per_multiprocessor
@@ -748,8 +382,8 @@ def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, n
 
 
 @pytest.mark.parametrize("cluster", [None, 2])
-def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster):
-    kernel, _ = get_saxpy_kernel
+def test_occupancy_max_active_clusters(get_saxpy_kernel_cubin, cluster):
+    kernel, _ = get_saxpy_kernel_cubin
     dev = Device()
     if dev.compute_capability < (9, 0):
         pytest.skip("Device with compute capability 90 or higher is required for cluster support")
@@ -763,8 +397,8 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster):
     assert max_active_clusters >= 0
 
 
-def test_occupancy_max_potential_cluster_size(get_saxpy_kernel):
-    kernel, _ = get_saxpy_kernel
+def test_occupancy_max_potential_cluster_size(get_saxpy_kernel_cubin):
+    kernel, _ = get_saxpy_kernel_cubin
     dev = Device()
     if dev.compute_capability < (9, 0):
         pytest.skip("Device with compute capability 90 or higher is required for cluster support")
@@ -778,8 +412,8 @@ def test_occupancy_max_potential_cluster_size(get_saxpy_kernel):
     assert max_potential_cluster_size >= 0
 
 
-def test_module_serialization_roundtrip(get_saxpy_kernel):
-    _, objcode = get_saxpy_kernel
+def test_module_serialization_roundtrip(get_saxpy_kernel_cubin):
+    _, objcode = get_saxpy_kernel_cubin
     result = pickle.loads(pickle.dumps(objcode))  # noqa: S403, S301
 
     assert isinstance(result, ObjectCode)

From 3665e4189271bd559d95a89cc99eca38ace981c4 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 6 Oct 2025 01:21:23 +0000
Subject: [PATCH 11/12] minor fixes

---
 cuda_core/tests/test_module.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index be215c92c8..e3198e7902 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -67,7 +67,6 @@ def get_saxpy_kernel_cubin(init_cuda):
         "cubin",
         name_expressions=("saxpy<float>", "saxpy<double>"),
     )
-
     # run in single precision
     return mod.get_kernel("saxpy<float>"), mod
 
@@ -154,9 +153,9 @@ def test_object_code_load_ptx(get_saxpy_kernel_ptx):
 def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path):
     ptx, mod = get_saxpy_kernel_ptx
     sym_map = mod._sym_map
-    assert isinstance(ptx, str)
+    assert isinstance(ptx, bytes)
     ptx_file = tmp_path / "test.ptx"
-    ptx_file.write_text(ptx)
+    ptx_file.write_bytes(ptx)
     mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map)
     assert mod_obj.code == str(ptx_file)
     assert mod_obj._code_type == "ptx"
@@ -187,8 +186,8 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel_cubin, tmp_path):
     mod.get_kernel("saxpy<double>")  # force loading
 
 
-def test_object_code_handle(get_saxpy_object_code):
-    mod = get_saxpy_object_code
+def test_object_code_handle(get_saxpy_kernel_cubin):
+    _, mod = get_saxpy_kernel_cubin
     assert mod.handle is not None
 
 

From 727ead575ec530c6f935a2dbabe8629d979a2d88 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Mon, 6 Oct 2025 01:30:28 +0000
Subject: [PATCH 12/12] make code_type public

---
 cuda_core/cuda/core/experimental/_module.py   | 5 +++++
 cuda_core/docs/source/release/0.X.Y-notes.rst | 1 +
 cuda_core/tests/test_module.py                | 8 ++++----
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index 71293be4d1..2c7ea3a156 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -666,6 +666,11 @@ def name(self) -> str:
         """Return a human-readable name of this code object."""
         return self._name
 
+    @property
+    def code_type(self) -> str:
+        """Return the type of the underlying code object."""
+        return self._code_type
+
     @property
     @precondition(_lazy_load_module)
     def handle(self):
diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
index e87cbdee31..2fb4093214 100644
--- a/cuda_core/docs/source/release/0.X.Y-notes.rst
+++ b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -32,6 +32,7 @@ New features
 - CUDA 13.x testing support through new ``test-cu13`` dependency group.
 - Stream-ordered memory allocation can now be shared on Linux via :class:`DeviceMemoryResource`.
 - Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``.
+- Added an :attr:`ObjectCode.code_type` attribute for querying the code type.
 
 
 New examples
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index e3198e7902..49df966c08 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -158,7 +158,7 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path):
     ptx_file.write_bytes(ptx)
     mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map)
     assert mod_obj.code == str(ptx_file)
-    assert mod_obj._code_type == "ptx"
+    assert mod_obj.code_type == "ptx"
     if not Program._can_load_generated_ptx():
         pytest.skip("PTX version too new for current driver")
     mod_obj.get_kernel("saxpy<double>")  # force loading
@@ -198,7 +198,7 @@ def test_object_code_load_ltoir(get_saxpy_kernel_ltoir):
     assert isinstance(ltoir, bytes)
     mod_obj = ObjectCode.from_ltoir(ltoir, symbol_mapping=sym_map)
     assert mod_obj.code == ltoir
-    assert mod_obj._code_type == "ltoir"
+    assert mod_obj.code_type == "ltoir"
     # ltoir doesn't support kernel retrieval directly as it's used for linking
     assert mod_obj._handle is None
     # Test that get_kernel fails for unsupported code type
@@ -215,7 +215,7 @@ def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path):
     ltoir_file.write_bytes(ltoir)
     mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map)
     assert mod_obj.code == str(ltoir_file)
-    assert mod_obj._code_type == "ltoir"
+    assert mod_obj.code_type == "ltoir"
     # ltoir doesn't support kernel retrieval directly as it's used for linking
     assert mod_obj._handle is None
 
@@ -418,4 +418,4 @@ def test_module_serialization_roundtrip(get_saxpy_kernel_cubin):
     assert isinstance(result, ObjectCode)
     assert objcode.code == result.code
     assert objcode._sym_map == result._sym_map
-    assert objcode._code_type == result._code_type
+    assert objcode.code_type == result.code_type