From afb4bd8a58eb27d612135c61933738ce16d9c1aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 02:02:15 +0000 Subject: [PATCH 01/12] Initial plan From b2bbf37a579c864e74f3dcd09e9e2e87f3d09ad0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 02:08:49 +0000 Subject: [PATCH 02/12] Add comprehensive tests for ObjectCode from_ltoir, from_fatbin, from_object, and from_library constructors Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/tests/test_module.py | 114 +++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 2b0fc265eb..6fef43890f 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -180,6 +180,120 @@ def test_object_code_handle(get_saxpy_object_code): assert mod.handle is not None +@pytest.fixture(scope="function") +def get_ltoir_object_code(init_cuda): + # Create LTOIR code using link-time optimization + prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True)) + mod = prog.compile("ltoir", name_expressions=("saxpy", "saxpy")) + return mod + + +def test_object_code_load_ltoir(get_ltoir_object_code): + mod = get_ltoir_object_code + ltoir = mod._module + sym_map = mod._sym_map + assert isinstance(ltoir, bytes) + mod_obj = ObjectCode.from_ltoir(ltoir, symbol_mapping=sym_map) + assert mod_obj.code == ltoir + assert mod_obj._code_type == "ltoir" + # ltoir doesn't support kernel retrieval directly as it's used for linking + assert mod_obj._handle is None # Should only be loaded when needed + + +def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path): + mod = get_ltoir_object_code + ltoir = mod._module + sym_map = mod._sym_map + assert isinstance(ltoir, bytes) + ltoir_file = tmp_path / "test.ltoir" + ltoir_file.write_bytes(ltoir) + mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map) + assert mod_obj.code == str(ltoir_file) + assert mod_obj._code_type == "ltoir" + assert mod_obj._handle is None # Should only be loaded when needed + + +def test_object_code_load_fatbin(get_saxpy_kernel): + # Use cubin as a substitute for fatbin since they have similar structure + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + mod_obj = ObjectCode.from_fatbin(cubin, symbol_mapping=sym_map) + assert mod_obj.code == cubin + assert mod_obj._code_type == "fatbin" + # fatbin supports kernel retrieval + mod_obj.get_kernel("saxpy") # force loading + + +def test_object_code_load_fatbin_from_file(get_saxpy_kernel, tmp_path): + # Use cubin as a substitute for fatbin since they have similar structure + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + fatbin_file = tmp_path / "test.fatbin" + fatbin_file.write_bytes(cubin) + mod_obj = ObjectCode.from_fatbin(str(fatbin_file), symbol_mapping=sym_map) + assert mod_obj.code == str(fatbin_file) + assert mod_obj._code_type == "fatbin" + mod_obj.get_kernel("saxpy") # force loading + + +def test_object_code_load_object(get_saxpy_kernel): + # Use cubin as a substitute for object code since they're binary formats + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + mod_obj = ObjectCode.from_object(cubin, symbol_mapping=sym_map) + assert mod_obj.code == cubin + assert mod_obj._code_type == "object" + # object code doesn't support direct kernel retrieval + assert mod_obj._handle is None # Should only be loaded when needed + + +def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path): + # Use cubin as a substitute for object code since they're binary formats + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + object_file = tmp_path / "test.o" + object_file.write_bytes(cubin) + mod_obj = ObjectCode.from_object(str(object_file), symbol_mapping=sym_map) + assert mod_obj.code == str(object_file) + assert mod_obj._code_type == "object" + assert mod_obj._handle is None # Should only be loaded when needed + + +def test_object_code_load_library(get_saxpy_kernel): + # Use cubin as a substitute for library since they're binary formats + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + mod_obj = ObjectCode.from_library(cubin, symbol_mapping=sym_map) + assert mod_obj.code == cubin + assert mod_obj._code_type == "library" + # library code doesn't support direct kernel retrieval + assert mod_obj._handle is None # Should only be loaded when needed + + +def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path): + # Use cubin as a substitute for library since they're binary formats + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + library_file = tmp_path / "test.a" + library_file.write_bytes(cubin) + mod_obj = ObjectCode.from_library(str(library_file), symbol_mapping=sym_map) + assert mod_obj.code == str(library_file) + assert mod_obj._code_type == "library" + assert mod_obj._handle is None # Should only be loaded when needed + + def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel From abbb07a9231bdfd31d7a466bd14f18827122ca9a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 22 Aug 2025 02:10:52 +0000 Subject: [PATCH 03/12] Enhance ObjectCode constructor tests with error handling and default value validation Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/tests/test_module.py | 57 ++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 6fef43890f..ce5f82abca 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -198,6 +198,9 @@ def test_object_code_load_ltoir(get_ltoir_object_code): assert mod_obj._code_type == "ltoir" # ltoir doesn't support kernel retrieval directly as it's used for linking assert mod_obj._handle is None # Should only be loaded when needed + # Test that get_kernel fails for unsupported code type + with pytest.raises(RuntimeError, match=r'Unsupported code type "ltoir"'): + mod_obj.get_kernel("saxpy") def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path): @@ -251,6 +254,9 @@ def test_object_code_load_object(get_saxpy_kernel): assert mod_obj._code_type == "object" # object code doesn't support direct kernel retrieval assert mod_obj._handle is None # Should only be loaded when needed + # Test that get_kernel fails for unsupported code type + with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): + mod_obj.get_kernel("saxpy") def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path): @@ -278,6 +284,9 @@ def test_object_code_load_library(get_saxpy_kernel): assert mod_obj._code_type == "library" # library code doesn't support direct kernel retrieval assert mod_obj._handle is None # Should only be loaded when needed + # Test that get_kernel fails for unsupported code type + with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'): + mod_obj.get_kernel("saxpy") def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path): @@ -294,6 +303,54 @@ def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path): assert mod_obj._handle is None # Should only be loaded when needed +def test_object_code_constructors_with_name_and_symbol_mapping(): + """Test that all from_* constructors properly set name and symbol_mapping""" + # Dummy data for testing + dummy_bytes = b"dummy_code_data" + test_name = "test_object" + test_sym_map = {"kernel1": "mangled_kernel1", "kernel2": "mangled_kernel2"} + + # Test all constructors + constructors = [ + (ObjectCode.from_cubin, "cubin"), + (ObjectCode.from_ptx, "ptx"), + (ObjectCode.from_ltoir, "ltoir"), + (ObjectCode.from_fatbin, "fatbin"), + (ObjectCode.from_object, "object"), + (ObjectCode.from_library, "library"), + ] + + for constructor, code_type in constructors: + obj = constructor(dummy_bytes, name=test_name, symbol_mapping=test_sym_map) + assert obj.name == test_name + assert obj._sym_map == test_sym_map + assert obj._code_type == code_type + assert obj.code == dummy_bytes + + +def test_object_code_constructors_default_values(): + """Test that all from_* constructors handle default values correctly""" + # Dummy data for testing + dummy_bytes = b"dummy_code_data" + + # Test all constructors with defaults + constructors = [ + (ObjectCode.from_cubin, "cubin"), + (ObjectCode.from_ptx, "ptx"), + (ObjectCode.from_ltoir, "ltoir"), + (ObjectCode.from_fatbin, "fatbin"), + (ObjectCode.from_object, "object"), + (ObjectCode.from_library, "library"), + ] + + for constructor, code_type in constructors: + obj = constructor(dummy_bytes) # Use defaults + assert obj.name == "" # Default name should be empty string + assert obj._sym_map == {} # Default symbol mapping should be empty dict + assert obj._code_type == code_type + assert obj.code == dummy_bytes + + def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel From 9ee5bfc8f6146d34877a0069ba3522e493ff0ab8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 24 Aug 2025 05:48:56 +0000 Subject: [PATCH 04/12] Fix linker to handle file paths for ObjectCode constructors Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_linker.py | 62 ++++++++++++++------ cuda_core/tests/test_module.py | 65 +++++++++++++++++++++ 2 files changed, 108 insertions(+), 19 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index c3528a14e2..028c497f4e 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -395,32 +395,56 @@ def __init__(self, *object_codes: ObjectCode, options: LinkerOptions = None): def _add_code_object(self, object_code: ObjectCode): data = object_code._module - assert_type(data, bytes) with _exception_manager(self): name_str = f"{object_code.name}" - if _nvjitlink: - _nvjitlink.add_data( - self._mnff.handle, - self._input_type_from_code_type(object_code._code_type), - data, - len(data), - name_str, - ) - else: - name_bytes = name_str.encode() - handle_return( - _driver.cuLinkAddData( + if isinstance(data, str): + # Handle file path input + if _nvjitlink: + _nvjitlink.add_file( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data, + ) + else: + name_bytes = name_str.encode() + handle_return( + _driver.cuLinkAddFile( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data.encode(), + 0, + None, + None, + ) + ) + self._mnff.const_char_keep_alive.append(name_bytes) + elif isinstance(data, bytes): + # Handle bytes input (existing logic) + if _nvjitlink: + _nvjitlink.add_data( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), data, len(data), - name_bytes, - 0, - None, - None, + name_str, ) - ) - self._mnff.const_char_keep_alive.append(name_bytes) + else: + name_bytes = name_str.encode() + handle_return( + _driver.cuLinkAddData( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data, + len(data), + name_bytes, + 0, + None, + None, + ) + ) + self._mnff.const_char_keep_alive.append(name_bytes) + else: + raise TypeError(f"Expected bytes or str, but got {type(data).__name__}") def link(self, target_type) -> ObjectCode: """ diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index ce5f82abca..3a2ac9c314 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -351,6 +351,71 @@ def test_object_code_constructors_default_values(): assert obj.code == dummy_bytes +def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path): + """Test that ObjectCode created from file paths works with the Linker""" + _, mod = get_saxpy_kernel + cubin = mod._module + assert isinstance(cubin, bytes) + + # Create temporary files for different code types + test_files = {} + for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]: + file_path = tmp_path / f"test.{code_type}" + file_path.write_bytes(cubin) # Use cubin bytes as proxy for all types + test_files[code_type] = str(file_path) + + # Create ObjectCode instances from file paths + file_based_objects = [] + for code_type, file_path in test_files.items(): + if code_type == "cubin": + obj = ObjectCode.from_cubin(file_path, name=f"file_{code_type}") + elif code_type == "ptx": + obj = ObjectCode.from_ptx(file_path, name=f"file_{code_type}") + elif code_type == "ltoir": + obj = ObjectCode.from_ltoir(file_path, name=f"file_{code_type}") + elif code_type == "fatbin": + obj = ObjectCode.from_fatbin(file_path, name=f"file_{code_type}") + elif code_type == "object": + obj = ObjectCode.from_object(file_path, name=f"file_{code_type}") + elif code_type == "library": + obj = ObjectCode.from_library(file_path, name=f"file_{code_type}") + + # Verify the ObjectCode was created correctly + assert obj.code == file_path + assert obj._code_type == code_type + assert obj.name == f"file_{code_type}" + assert isinstance(obj._module, str) # Should store the file path + file_based_objects.append(obj) + + # Test that these ObjectCode instances can be used with Linker + # Note: We can't actually link most of these types together in practice, + # but we can verify the linker accepts them and handles the file path correctly + from cuda.core.experimental import Linker, LinkerOptions + + # Test with ptx which should be linkable (use only PTX for actual linking) + ptx_obj = None + for obj in file_based_objects: + if obj._code_type == "ptx": + ptx_obj = obj + break + + if ptx_obj is not None: + # Create a simple linker test - this will test that _add_code_object + # handles file paths correctly by not crashing on the file path + try: + arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + options = LinkerOptions(arch=arch) + # This should not crash - it should handle the file path in _add_code_object + linker = Linker(ptx_obj, options=options) + # We don't need to actually link since that might fail due to content, + # but creating the linker tests our file path handling + assert linker is not None + except Exception as e: + # If it fails, it should be due to content issues, not file path handling + # The key is that it should not fail with "Expected type bytes, but got str" + assert "Expected type bytes, but got str" not in str(e), f"File path handling failed: {e}" + + def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel From f3aad444cef8735404785b465b85658f5378b412 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 25 Aug 2025 00:27:04 +0000 Subject: [PATCH 05/12] Flatten nested if statements in linker _add_code_object method Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_linker.py | 78 ++++++++++----------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 028c497f4e..ca498a6ad2 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -397,52 +397,52 @@ def _add_code_object(self, object_code: ObjectCode): data = object_code._module with _exception_manager(self): name_str = f"{object_code.name}" - if isinstance(data, str): - # Handle file path input - if _nvjitlink: - _nvjitlink.add_file( - self._mnff.handle, - self._input_type_from_code_type(object_code._code_type), - data, - ) - else: - name_bytes = name_str.encode() - handle_return( - _driver.cuLinkAddFile( - self._mnff.handle, - self._input_type_from_code_type(object_code._code_type), - data.encode(), - 0, - None, - None, - ) - ) - self._mnff.const_char_keep_alive.append(name_bytes) + if _nvjitlink and isinstance(data, bytes): + # Handle bytes input with nvjitlink + _nvjitlink.add_data( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data, + len(data), + name_str, + ) + elif _nvjitlink and isinstance(data, str): + # Handle file path input with nvjitlink + _nvjitlink.add_file( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data, + ) elif isinstance(data, bytes): - # Handle bytes input (existing logic) - if _nvjitlink: - _nvjitlink.add_data( + # Handle bytes input with driver API + name_bytes = name_str.encode() + handle_return( + _driver.cuLinkAddData( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), data, len(data), - name_str, + name_bytes, + 0, + None, + None, ) - else: - name_bytes = name_str.encode() - handle_return( - _driver.cuLinkAddData( - self._mnff.handle, - self._input_type_from_code_type(object_code._code_type), - data, - len(data), - name_bytes, - 0, - None, - None, - ) + ) + self._mnff.const_char_keep_alive.append(name_bytes) + elif isinstance(data, str): + # Handle file path input with driver API + name_bytes = name_str.encode() + handle_return( + _driver.cuLinkAddFile( + self._mnff.handle, + self._input_type_from_code_type(object_code._code_type), + data.encode(), + 0, + None, + None, ) - self._mnff.const_char_keep_alive.append(name_bytes) + ) + self._mnff.const_char_keep_alive.append(name_bytes) else: raise TypeError(f"Expected bytes or str, but got {type(data).__name__}") From 5609cb165d3fcf7ca0222054590c08c2b322c76c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 25 Aug 2025 00:57:30 +0000 Subject: [PATCH 06/12] Address review comments: reorganize tests, add NVCC-based testing, fix fixture naming Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/tests/test_module.py | 446 ++++++++++++++++++++++----------- 1 file changed, 306 insertions(+), 140 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 3a2ac9c314..5376fc1ae3 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -94,6 +94,14 @@ def get_saxpy_object_code(init_cuda): return mod +@pytest.fixture(scope="function") +def get_saxpy_kernel_ltoir(init_cuda): + # Create LTOIR code using link-time optimization + prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True)) + mod = prog.compile("ltoir", name_expressions=("saxpy", "saxpy")) + return mod + + def test_get_kernel(init_cuda): kernel = """extern "C" __global__ void ABC() { }""" @@ -163,6 +171,20 @@ def test_object_code_load_ptx(get_saxpy_kernel_ptx): mod_obj.get_kernel("saxpy") # force loading +def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): + ptx, mod = get_saxpy_kernel_ptx + sym_map = mod._sym_map + assert isinstance(ptx, str) + ptx_file = tmp_path / "test.ptx" + ptx_file.write_text(ptx) + mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map) + assert mod_obj.code == str(ptx_file) + assert mod_obj._code_type == "ptx" + if not Program._can_load_generated_ptx(): + pytest.skip("PTX version too new for current driver") + mod_obj.get_kernel("saxpy") # force loading + + def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): _, mod = get_saxpy_kernel cubin = mod._module @@ -180,16 +202,8 @@ def test_object_code_handle(get_saxpy_object_code): assert mod.handle is not None -@pytest.fixture(scope="function") -def get_ltoir_object_code(init_cuda): - # Create LTOIR code using link-time optimization - prog = Program(SAXPY_KERNEL, code_type="c++", options=ProgramOptions(link_time_optimization=True)) - mod = prog.compile("ltoir", name_expressions=("saxpy", "saxpy")) - return mod - - -def test_object_code_load_ltoir(get_ltoir_object_code): - mod = get_ltoir_object_code +def test_object_code_load_ltoir(get_saxpy_kernel_ltoir): + mod = get_saxpy_kernel_ltoir ltoir = mod._module sym_map = mod._sym_map assert isinstance(ltoir, bytes) @@ -203,8 +217,8 @@ def test_object_code_load_ltoir(get_ltoir_object_code): mod_obj.get_kernel("saxpy") -def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path): - mod = get_ltoir_object_code +def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path): + mod = get_saxpy_kernel_ltoir ltoir = mod._module sym_map = mod._sym_map assert isinstance(ltoir, bytes) @@ -216,139 +230,291 @@ def test_object_code_load_ltoir_from_file(get_ltoir_object_code, tmp_path): assert mod_obj._handle is None # Should only be loaded when needed -def test_object_code_load_fatbin(get_saxpy_kernel): - # Use cubin as a substitute for fatbin since they have similar structure - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - mod_obj = ObjectCode.from_fatbin(cubin, symbol_mapping=sym_map) - assert mod_obj.code == cubin - assert mod_obj._code_type == "fatbin" - # fatbin supports kernel retrieval - mod_obj.get_kernel("saxpy") # force loading - - -def test_object_code_load_fatbin_from_file(get_saxpy_kernel, tmp_path): - # Use cubin as a substitute for fatbin since they have similar structure - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - fatbin_file = tmp_path / "test.fatbin" - fatbin_file.write_bytes(cubin) - mod_obj = ObjectCode.from_fatbin(str(fatbin_file), symbol_mapping=sym_map) - assert mod_obj.code == str(fatbin_file) - assert mod_obj._code_type == "fatbin" - mod_obj.get_kernel("saxpy") # force loading - - -def test_object_code_load_object(get_saxpy_kernel): - # Use cubin as a substitute for object code since they're binary formats - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - mod_obj = ObjectCode.from_object(cubin, symbol_mapping=sym_map) - assert mod_obj.code == cubin - assert mod_obj._code_type == "object" +def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path): + """ + Test fatbin loading using NVCC-generated fatbins. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + # Generate fatbin for multiple architectures + archs = ["sm_75", "sm_90", current_arch] + arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) + + fatbin_file = tmp_path / "kernel.fatbin" + + try: + # Generate fatbin using nvcc + cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate fatbin with nvcc: {e}") + + # Test loading fatbin from bytes (in-memory) + fatbin_bytes = fatbin_file.read_bytes() + mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory") + assert mod_obj_mem.code == fatbin_bytes + assert mod_obj_mem._code_type == "fatbin" + assert mod_obj_mem.name == "fatbin_memory" + + +def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path): + """ + Test fatbin loading from file path using NVCC-generated fatbins. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + # Generate fatbin for multiple architectures + archs = ["sm_75", "sm_90", current_arch] + arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) + + fatbin_file = tmp_path / "kernel.fatbin" + + try: + # Generate fatbin using nvcc + cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate fatbin with nvcc: {e}") + + # Test loading fatbin from file path + mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file") + assert mod_obj_file.code == str(fatbin_file) + assert mod_obj_file._code_type == "fatbin" + assert mod_obj_file.name == "fatbin_file" + + +def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path): + """ + Test object code loading using NVCC-generated object files. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + object_file = tmp_path / "kernel.o" + + try: + # Generate object file using nvcc + cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate object file with nvcc: {e}") + + # Test loading object from bytes (in-memory) + object_bytes = object_file.read_bytes() + mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory") + assert mod_obj_mem.code == object_bytes + assert mod_obj_mem._code_type == "object" + assert mod_obj_mem.name == "object_memory" # object code doesn't support direct kernel retrieval - assert mod_obj._handle is None # Should only be loaded when needed + assert mod_obj_mem._handle is None # Should only be loaded when needed # Test that get_kernel fails for unsupported code type with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): - mod_obj.get_kernel("saxpy") - - -def test_object_code_load_object_from_file(get_saxpy_kernel, tmp_path): - # Use cubin as a substitute for object code since they're binary formats - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - object_file = tmp_path / "test.o" - object_file.write_bytes(cubin) - mod_obj = ObjectCode.from_object(str(object_file), symbol_mapping=sym_map) - assert mod_obj.code == str(object_file) - assert mod_obj._code_type == "object" - assert mod_obj._handle is None # Should only be loaded when needed + mod_obj_mem.get_kernel("simple_kernel") -def test_object_code_load_library(get_saxpy_kernel): - # Use cubin as a substitute for library since they're binary formats - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - mod_obj = ObjectCode.from_library(cubin, symbol_mapping=sym_map) - assert mod_obj.code == cubin - assert mod_obj._code_type == "library" - # library code doesn't support direct kernel retrieval - assert mod_obj._handle is None # Should only be loaded when needed - # Test that get_kernel fails for unsupported code type - with pytest.raises(RuntimeError, match=r'Unsupported code type "library"'): - mod_obj.get_kernel("saxpy") - - -def test_object_code_load_library_from_file(get_saxpy_kernel, tmp_path): - # Use cubin as a substitute for library since they're binary formats - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - library_file = tmp_path / "test.a" - library_file.write_bytes(cubin) - mod_obj = ObjectCode.from_library(str(library_file), symbol_mapping=sym_map) - assert mod_obj.code == str(library_file) - assert mod_obj._code_type == "library" - assert mod_obj._handle is None # Should only be loaded when needed - - -def test_object_code_constructors_with_name_and_symbol_mapping(): - """Test that all from_* constructors properly set name and symbol_mapping""" - # Dummy data for testing - dummy_bytes = b"dummy_code_data" - test_name = "test_object" - test_sym_map = {"kernel1": "mangled_kernel1", "kernel2": "mangled_kernel2"} - - # Test all constructors - constructors = [ - (ObjectCode.from_cubin, "cubin"), - (ObjectCode.from_ptx, "ptx"), - (ObjectCode.from_ltoir, "ltoir"), - (ObjectCode.from_fatbin, "fatbin"), - (ObjectCode.from_object, "object"), - (ObjectCode.from_library, "library"), - ] - - for constructor, code_type in constructors: - obj = constructor(dummy_bytes, name=test_name, symbol_mapping=test_sym_map) - assert obj.name == test_name - assert obj._sym_map == test_sym_map - assert obj._code_type == code_type - assert obj.code == dummy_bytes - - -def test_object_code_constructors_default_values(): - """Test that all from_* constructors handle default values correctly""" - # Dummy data for testing - dummy_bytes = b"dummy_code_data" - - # Test all constructors with defaults - constructors = [ - (ObjectCode.from_cubin, "cubin"), - (ObjectCode.from_ptx, "ptx"), - (ObjectCode.from_ltoir, "ltoir"), - (ObjectCode.from_fatbin, "fatbin"), - (ObjectCode.from_object, "object"), - (ObjectCode.from_library, "library"), - ] - - for constructor, code_type in constructors: - obj = constructor(dummy_bytes) # Use defaults - assert obj.name == "" # Default name should be empty string - assert obj._sym_map == {} # Default symbol mapping should be empty dict - assert obj._code_type == code_type - assert obj.code == dummy_bytes +def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path): + """ + Test object code loading from file path using NVCC-generated object files. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + object_file = tmp_path / "kernel.o" + + try: + # Generate object file using nvcc + cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate object file with nvcc: {e}") + + # Test loading object from file path + mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file") + assert mod_obj_file.code == str(object_file) + assert mod_obj_file._code_type == "object" + assert mod_obj_file.name == "object_file" + assert mod_obj_file._handle is None # Should only be loaded when needed + + +def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path): + """ + Test library loading using NVCC-generated library files. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + object_file = tmp_path / "kernel.o" + library_file = tmp_path / "libkernel.a" + + try: + # Generate object file first + cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + + # Create library from object file + cmd = f"ar rcs {library_file} {object_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate library with nvcc/ar: {e}") + + # Test loading library from bytes (in-memory) + library_bytes = library_file.read_bytes() + mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory") + assert mod_obj_mem.code == library_bytes + assert mod_obj_mem._code_type == "library" + assert mod_obj_mem.name == "library_memory" + assert mod_obj_mem._handle is None # Should only be loaded when needed + + +def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path): + """ + Test library loading from file path using NVCC-generated library files. + TODO: Can drop NVCC from test dependency once #156 is resolved. + """ + import shutil + import subprocess + + # Check if NVCC is available + if not shutil.which("nvcc"): + pytest.skip("NVCC not available in PATH") + + # Create a simple CUDA kernel file + kernel_source = ''' +extern "C" __global__ void simple_kernel(float* data) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] = data[idx] * 2.0f; +} +''' + + cu_file = tmp_path / "kernel.cu" + cu_file.write_text(kernel_source) + + # Get current device architecture + from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) + + object_file = tmp_path / "kernel.o" + library_file = tmp_path / "libkernel.a" + + try: + # Generate object file first + cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + + # Create library from object file + cmd = f"ar rcs {library_file} {object_file}" + subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + pytest.skip(f"Failed to generate library with nvcc/ar: {e}") + + # Test loading library from file path + mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file") + assert mod_obj_file.code == str(library_file) + assert mod_obj_file._code_type == "library" + assert mod_obj_file.name == "library_file" + assert mod_obj_file._handle is None # Should only be loaded when needed def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path): From 19fbe2bacf9083660df33bd3463eeb54bbf1e4fb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Sep 2025 14:34:51 +0000 Subject: [PATCH 07/12] Remove self-explanatory comments and clarify if conditions in linker Co-authored-by: leofang <5534781+leofang@users.noreply.github.com> --- cuda_core/cuda/core/experimental/_linker.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index c2e9c57fec..97af99be6a 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -398,7 +398,6 @@ def _add_code_object(self, object_code: ObjectCode): with _exception_manager(self): name_str = f"{object_code.name}" if _nvjitlink and isinstance(data, bytes): - # Handle bytes input with nvjitlink _nvjitlink.add_data( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), @@ -407,14 +406,12 @@ def _add_code_object(self, object_code: ObjectCode): name_str, ) elif _nvjitlink and isinstance(data, str): - # Handle file path input with nvjitlink _nvjitlink.add_file( self._mnff.handle, self._input_type_from_code_type(object_code._code_type), data, ) - elif isinstance(data, bytes): - # Handle bytes input with driver API + elif (not _nvjitlink) and isinstance(data, bytes): name_bytes = name_str.encode() handle_return( _driver.cuLinkAddData( @@ -429,8 +426,7 @@ def _add_code_object(self, object_code: ObjectCode): ) ) self._mnff.const_char_keep_alive.append(name_bytes) - elif isinstance(data, str): - # Handle file path input with driver API + elif (not _nvjitlink) and isinstance(data, str): name_bytes = name_str.encode() handle_return( _driver.cuLinkAddFile( From 68003ba5a53216d2be1d9febc1b5cc2035a7fbc9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:23:31 +0000 Subject: [PATCH 08/12] [pre-commit.ci] auto code formatting --- cuda_core/tests/test_module.py | 134 +++++++++++++++++---------------- 1 file changed, 70 insertions(+), 64 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 5376fc1ae3..b9622a435c 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -237,39 +237,40 @@ def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + # Generate fatbin for multiple architectures archs = ["sm_75", "sm_90", current_arch] arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) - + fatbin_file = tmp_path / "kernel.fatbin" - + try: # Generate fatbin using nvcc cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate fatbin with nvcc: {e}") - + # Test loading fatbin from bytes (in-memory) fatbin_bytes = fatbin_file.read_bytes() mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory") @@ -285,39 +286,40 @@ def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + # Generate fatbin for multiple architectures archs = ["sm_75", "sm_90", current_arch] arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) - + fatbin_file = tmp_path / "kernel.fatbin" - + try: # Generate fatbin using nvcc cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate fatbin with nvcc: {e}") - + # Test loading fatbin from file path mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file") assert mod_obj_file.code == str(fatbin_file) @@ -332,35 +334,36 @@ def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + object_file = tmp_path / "kernel.o" - + try: # Generate object file using nvcc cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate object file with nvcc: {e}") - + # Test loading object from bytes (in-memory) object_bytes = object_file.read_bytes() mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory") @@ -381,35 +384,36 @@ def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + object_file = tmp_path / "kernel.o" - + try: # Generate object file using nvcc cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate object file with nvcc: {e}") - + # Test loading object from file path mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file") assert mod_obj_file.code == str(object_file) @@ -425,40 +429,41 @@ def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + object_file = tmp_path / "kernel.o" library_file = tmp_path / "libkernel.a" - + try: # Generate object file first cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - + # Create library from object file cmd = f"ar rcs {library_file} {object_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate library with nvcc/ar: {e}") - + # Test loading library from bytes (in-memory) library_bytes = library_file.read_bytes() mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory") @@ -475,40 +480,41 @@ def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path): """ import shutil import subprocess - + # Check if NVCC is available if not shutil.which("nvcc"): pytest.skip("NVCC not available in PATH") - + # Create a simple CUDA kernel file - kernel_source = ''' + kernel_source = """ extern "C" __global__ void simple_kernel(float* data) { int idx = threadIdx.x + blockIdx.x * blockDim.x; data[idx] = data[idx] * 2.0f; } -''' - +""" + cu_file = tmp_path / "kernel.cu" cu_file.write_text(kernel_source) - + # Get current device architecture from cuda.core.experimental import Device + current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - + object_file = tmp_path / "kernel.o" library_file = tmp_path / "libkernel.a" - + try: # Generate object file first cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - + # Create library from object file cmd = f"ar rcs {library_file} {object_file}" subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: pytest.skip(f"Failed to generate library with nvcc/ar: {e}") - + # Test loading library from file path mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file") assert mod_obj_file.code == str(library_file) @@ -522,14 +528,14 @@ def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path): _, mod = get_saxpy_kernel cubin = mod._module assert isinstance(cubin, bytes) - + # Create temporary files for different code types test_files = {} for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]: file_path = tmp_path / f"test.{code_type}" file_path.write_bytes(cubin) # Use cubin bytes as proxy for all types test_files[code_type] = str(file_path) - + # Create ObjectCode instances from file paths file_based_objects = [] for code_type, file_path in test_files.items(): @@ -545,26 +551,26 @@ def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path): obj = ObjectCode.from_object(file_path, name=f"file_{code_type}") elif code_type == "library": obj = ObjectCode.from_library(file_path, name=f"file_{code_type}") - + # Verify the ObjectCode was created correctly assert obj.code == file_path assert obj._code_type == code_type assert obj.name == f"file_{code_type}" assert isinstance(obj._module, str) # Should store the file path file_based_objects.append(obj) - + # Test that these ObjectCode instances can be used with Linker # Note: We can't actually link most of these types together in practice, # but we can verify the linker accepts them and handles the file path correctly from cuda.core.experimental import Linker, LinkerOptions - + # Test with ptx which should be linkable (use only PTX for actual linking) ptx_obj = None for obj in file_based_objects: if obj._code_type == "ptx": ptx_obj = obj break - + if ptx_obj is not None: # Create a simple linker test - this will test that _add_code_object # handles file paths correctly by not crashing on the file path From 62e240d9b7ea85ee5d9cf6642a226b877a208787 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 6 Oct 2025 01:03:59 +0000 Subject: [PATCH 09/12] nit: reorder test to make it easier to follow --- cuda_core/tests/test_module.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 51d660d781..27425dc722 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -150,16 +150,6 @@ def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type): assert isinstance(value, expected_type), f"Expected {attr} to be of type {expected_type}, but got {type(value)}" -def test_object_code_load_cubin(get_saxpy_kernel): - _, mod = get_saxpy_kernel - cubin = mod._module - sym_map = mod._sym_map - assert isinstance(cubin, bytes) - mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map) - assert mod.code == cubin - mod.get_kernel("saxpy") # force loading - - def test_object_code_load_ptx(get_saxpy_kernel_ptx): ptx, mod = get_saxpy_kernel_ptx sym_map = mod._sym_map @@ -184,6 +174,16 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): mod_obj.get_kernel("saxpy") # force loading +def test_object_code_load_cubin(get_saxpy_kernel): + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map) + assert mod.code == cubin + mod.get_kernel("saxpy") # force loading + + def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): _, mod = get_saxpy_kernel cubin = mod._module From 45d0b244a17270ec90132a9e2370049aa29d058e Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 6 Oct 2025 01:12:50 +0000 Subject: [PATCH 10/12] rename fixture for clarity + purge object/library/fatbin tests for now --- cuda_core/tests/test_module.py | 420 +++------------------------------ 1 file changed, 27 insertions(+), 393 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 27425dc722..be215c92c8 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -60,7 +60,7 @@ def test_object_code_init_disabled(): @pytest.fixture(scope="function") -def get_saxpy_kernel(init_cuda): +def get_saxpy_kernel_cubin(init_cuda): # prepare program prog = Program(SAXPY_KERNEL, code_type="c++") mod = prog.compile( @@ -74,6 +74,7 @@ def get_saxpy_kernel(init_cuda): @pytest.fixture(scope="function") def get_saxpy_kernel_ptx(init_cuda): + # prepare program prog = Program(SAXPY_KERNEL, code_type="c++") mod = prog.compile( "ptx", @@ -83,16 +84,6 @@ def get_saxpy_kernel_ptx(init_cuda): return ptx, mod -@pytest.fixture(scope="function") -def get_saxpy_object_code(init_cuda): - prog = Program(SAXPY_KERNEL, code_type="c++") - mod = prog.compile( - "cubin", - name_expressions=("saxpy", "saxpy"), - ) - return mod - - @pytest.fixture(scope="function") def get_saxpy_kernel_ltoir(init_cuda): # Create LTOIR code using link-time optimization @@ -137,8 +128,8 @@ def test_get_kernel(init_cuda): ("cluster_scheduling_policy_preference", int), ], ) -def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type): - kernel, _ = get_saxpy_kernel +def test_read_only_kernel_attributes(get_saxpy_kernel_cubin, attr, expected_type): + kernel, _ = get_saxpy_kernel_cubin method = getattr(kernel.attributes, attr) # get the value without providing a device ordinal value = method() @@ -174,8 +165,8 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): mod_obj.get_kernel("saxpy") # force loading -def test_object_code_load_cubin(get_saxpy_kernel): - _, mod = get_saxpy_kernel +def test_object_code_load_cubin(get_saxpy_kernel_cubin): + _, mod = get_saxpy_kernel_cubin cubin = mod._module sym_map = mod._sym_map assert isinstance(cubin, bytes) @@ -184,8 +175,8 @@ def test_object_code_load_cubin(get_saxpy_kernel): mod.get_kernel("saxpy") # force loading -def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): - _, mod = get_saxpy_kernel +def test_object_code_load_cubin_from_file(get_saxpy_kernel_cubin, tmp_path): + _, mod = get_saxpy_kernel_cubin cubin = mod._module sym_map = mod._sym_map assert isinstance(cubin, bytes) @@ -210,7 +201,7 @@ def test_object_code_load_ltoir(get_saxpy_kernel_ltoir): assert mod_obj.code == ltoir assert mod_obj._code_type == "ltoir" # ltoir doesn't support kernel retrieval directly as it's used for linking - assert mod_obj._handle is None # Should only be loaded when needed + assert mod_obj._handle is None # Test that get_kernel fails for unsupported code type with pytest.raises(RuntimeError, match=r'Unsupported code type "ltoir"'): mod_obj.get_kernel("saxpy") @@ -226,369 +217,12 @@ def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path): mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map) assert mod_obj.code == str(ltoir_file) assert mod_obj._code_type == "ltoir" - assert mod_obj._handle is None # Should only be loaded when needed - - -def test_object_code_load_fatbin(get_saxpy_kernel_ltoir, tmp_path): - """ - Test fatbin loading using NVCC-generated fatbins. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - # Generate fatbin for multiple architectures - archs = ["sm_75", "sm_90", current_arch] - arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) - - fatbin_file = tmp_path / "kernel.fatbin" - - try: - # Generate fatbin using nvcc - cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate fatbin with nvcc: {e}") - - # Test loading fatbin from bytes (in-memory) - fatbin_bytes = fatbin_file.read_bytes() - mod_obj_mem = ObjectCode.from_fatbin(fatbin_bytes, name="fatbin_memory") - assert mod_obj_mem.code == fatbin_bytes - assert mod_obj_mem._code_type == "fatbin" - assert mod_obj_mem.name == "fatbin_memory" - - -def test_object_code_load_fatbin_from_file(get_saxpy_kernel_ltoir, tmp_path): - """ - Test fatbin loading from file path using NVCC-generated fatbins. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - # Generate fatbin for multiple architectures - archs = ["sm_75", "sm_90", current_arch] - arch_flags = " ".join(f"--gpu-architecture={arch}" for arch in set(archs)) - - fatbin_file = tmp_path / "kernel.fatbin" - - try: - # Generate fatbin using nvcc - cmd = f"nvcc --fatbin {arch_flags} -o {fatbin_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate fatbin with nvcc: {e}") - - # Test loading fatbin from file path - mod_obj_file = ObjectCode.from_fatbin(str(fatbin_file), name="fatbin_file") - assert mod_obj_file.code == str(fatbin_file) - assert mod_obj_file._code_type == "fatbin" - assert mod_obj_file.name == "fatbin_file" - - -def test_object_code_load_object(get_saxpy_kernel_ltoir, tmp_path): - """ - Test object code loading using NVCC-generated object files. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - object_file = tmp_path / "kernel.o" - - try: - # Generate object file using nvcc - cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate object file with nvcc: {e}") - - # Test loading object from bytes (in-memory) - object_bytes = object_file.read_bytes() - mod_obj_mem = ObjectCode.from_object(object_bytes, name="object_memory") - assert mod_obj_mem.code == object_bytes - assert mod_obj_mem._code_type == "object" - assert mod_obj_mem.name == "object_memory" - # object code doesn't support direct kernel retrieval - assert mod_obj_mem._handle is None # Should only be loaded when needed - # Test that get_kernel fails for unsupported code type - with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'): - mod_obj_mem.get_kernel("simple_kernel") - - -def test_object_code_load_object_from_file(get_saxpy_kernel_ltoir, tmp_path): - """ - Test object code loading from file path using NVCC-generated object files. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - object_file = tmp_path / "kernel.o" - - try: - # Generate object file using nvcc - cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate object file with nvcc: {e}") - - # Test loading object from file path - mod_obj_file = ObjectCode.from_object(str(object_file), name="object_file") - assert mod_obj_file.code == str(object_file) - assert mod_obj_file._code_type == "object" - assert mod_obj_file.name == "object_file" - assert mod_obj_file._handle is None # Should only be loaded when needed - - -def test_object_code_load_library(get_saxpy_kernel_ltoir, tmp_path): - """ - Test library loading using NVCC-generated library files. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - object_file = tmp_path / "kernel.o" - library_file = tmp_path / "libkernel.a" - - try: - # Generate object file first - cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - - # Create library from object file - cmd = f"ar rcs {library_file} {object_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate library with nvcc/ar: {e}") - - # Test loading library from bytes (in-memory) - library_bytes = library_file.read_bytes() - mod_obj_mem = ObjectCode.from_library(library_bytes, name="library_memory") - assert mod_obj_mem.code == library_bytes - assert mod_obj_mem._code_type == "library" - assert mod_obj_mem.name == "library_memory" - assert mod_obj_mem._handle is None # Should only be loaded when needed - - -def test_object_code_load_library_from_file(get_saxpy_kernel_ltoir, tmp_path): - """ - Test library loading from file path using NVCC-generated library files. - TODO: Can drop NVCC from test dependency once #156 is resolved. - """ - import shutil - import subprocess - - # Check if NVCC is available - if not shutil.which("nvcc"): - pytest.skip("NVCC not available in PATH") - - # Create a simple CUDA kernel file - kernel_source = """ -extern "C" __global__ void simple_kernel(float* data) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - data[idx] = data[idx] * 2.0f; -} -""" - - cu_file = tmp_path / "kernel.cu" - cu_file.write_text(kernel_source) - - # Get current device architecture - from cuda.core.experimental import Device - - current_arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - - object_file = tmp_path / "kernel.o" - library_file = tmp_path / "libkernel.a" - - try: - # Generate object file first - cmd = f"nvcc --device-c --gpu-architecture={current_arch} -o {object_file} {cu_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - - # Create library from object file - cmd = f"ar rcs {library_file} {object_file}" - subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) - except subprocess.CalledProcessError as e: - pytest.skip(f"Failed to generate library with nvcc/ar: {e}") - - # Test loading library from file path - mod_obj_file = ObjectCode.from_library(str(library_file), name="library_file") - assert mod_obj_file.code == str(library_file) - assert mod_obj_file._code_type == "library" - assert mod_obj_file.name == "library_file" - assert mod_obj_file._handle is None # Should only be loaded when needed - + # ltoir doesn't support kernel retrieval directly as it's used for linking + assert mod_obj._handle is None -def test_object_code_file_path_linker_integration(get_saxpy_kernel, tmp_path): - """Test that ObjectCode created from file paths works with the Linker""" - _, mod = get_saxpy_kernel - cubin = mod._module - assert isinstance(cubin, bytes) - # Create temporary files for different code types - test_files = {} - for code_type in ["cubin", "ptx", "ltoir", "fatbin", "object", "library"]: - file_path = tmp_path / f"test.{code_type}" - file_path.write_bytes(cubin) # Use cubin bytes as proxy for all types - test_files[code_type] = str(file_path) - - # Create ObjectCode instances from file paths - file_based_objects = [] - for code_type, file_path in test_files.items(): - if code_type == "cubin": - obj = ObjectCode.from_cubin(file_path, name=f"file_{code_type}") - elif code_type == "ptx": - obj = ObjectCode.from_ptx(file_path, name=f"file_{code_type}") - elif code_type == "ltoir": - obj = ObjectCode.from_ltoir(file_path, name=f"file_{code_type}") - elif code_type == "fatbin": - obj = ObjectCode.from_fatbin(file_path, name=f"file_{code_type}") - elif code_type == "object": - obj = ObjectCode.from_object(file_path, name=f"file_{code_type}") - elif code_type == "library": - obj = ObjectCode.from_library(file_path, name=f"file_{code_type}") - - # Verify the ObjectCode was created correctly - assert obj.code == file_path - assert obj._code_type == code_type - assert obj.name == f"file_{code_type}" - assert isinstance(obj._module, str) # Should store the file path - file_based_objects.append(obj) - - # Test that these ObjectCode instances can be used with Linker - # Note: We can't actually link most of these types together in practice, - # but we can verify the linker accepts them and handles the file path correctly - from cuda.core.experimental import Linker, LinkerOptions - - # Test with ptx which should be linkable (use only PTX for actual linking) - ptx_obj = None - for obj in file_based_objects: - if obj._code_type == "ptx": - ptx_obj = obj - break - - if ptx_obj is not None: - # Create a simple linker test - this will test that _add_code_object - # handles file paths correctly by not crashing on the file path - try: - arch = "sm_" + "".join(f"{i}" for i in Device().compute_capability) - options = LinkerOptions(arch=arch) - # This should not crash - it should handle the file path in _add_code_object - linker = Linker(ptx_obj, options=options) - # We don't need to actually link since that might fail due to content, - # but creating the linker tests our file path handling - assert linker is not None - except Exception as e: - # If it fails, it should be due to content issues, not file path handling - # The key is that it should not fail with "Expected type bytes, but got str" - assert "Expected type bytes, but got str" not in str(e), f"File path handling failed: {e}" - - -def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check): - krn, _ = get_saxpy_kernel +def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check): + krn, _ = get_saxpy_kernel_cubin if cuda12_4_prerequisite_check: assert krn.num_arguments == 5 @@ -666,8 +300,8 @@ def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequi @pytest.mark.parametrize("block_size", [32, 64, 96, 120, 128, 256]) @pytest.mark.parametrize("smem_size_per_block", [0, 32, 4096]) -def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel, block_size, smem_size_per_block): - kernel, _ = get_saxpy_kernel +def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel_cubin, block_size, smem_size_per_block): + kernel, _ = get_saxpy_kernel_cubin dev_props = Device().properties assert block_size <= dev_props.max_threads_per_block assert smem_size_per_block <= dev_props.max_shared_memory_per_block @@ -683,9 +317,9 @@ def test_occupancy_max_active_block_per_multiprocessor(get_saxpy_kernel, block_s @pytest.mark.parametrize("block_size_limit", [32, 64, 96, 120, 128, 256, 0]) @pytest.mark.parametrize("smem_size_per_block", [0, 32, 4096]) -def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel, block_size_limit, smem_size_per_block): +def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel_cubin, block_size_limit, smem_size_per_block): """Tests use case when shared memory needed is independent on the block size""" - kernel, _ = get_saxpy_kernel + kernel, _ = get_saxpy_kernel_cubin dev_props = Device().properties assert block_size_limit <= dev_props.max_threads_per_block assert smem_size_per_block <= dev_props.max_shared_memory_per_block @@ -710,9 +344,9 @@ def test_occupancy_max_potential_block_size_constant(get_saxpy_kernel, block_siz @pytest.mark.skipif(numba is None, reason="Test requires numba to be installed") @pytest.mark.parametrize("block_size_limit", [32, 64, 96, 120, 128, 277, 0]) -def test_occupancy_max_potential_block_size_b2dsize(get_saxpy_kernel, block_size_limit): +def test_occupancy_max_potential_block_size_b2dsize(get_saxpy_kernel_cubin, block_size_limit): """Tests use case when shared memory needed depends on the block size""" - kernel, _ = get_saxpy_kernel + kernel, _ = get_saxpy_kernel_cubin def shared_memory_needed(block_size: numba.intc) -> numba.size_t: "Size of dynamic shared memory needed by kernel of this block size" @@ -737,8 +371,8 @@ def shared_memory_needed(block_size: numba.intc) -> numba.size_t: @pytest.mark.parametrize("num_blocks_per_sm, block_size", [(4, 32), (2, 64), (2, 96), (3, 120), (2, 128), (1, 256)]) -def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, num_blocks_per_sm, block_size): - kernel, _ = get_saxpy_kernel +def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel_cubin, num_blocks_per_sm, block_size): + kernel, _ = get_saxpy_kernel_cubin dev_props = Device().properties assert block_size <= dev_props.max_threads_per_block assert num_blocks_per_sm * block_size <= dev_props.max_threads_per_multiprocessor @@ -748,8 +382,8 @@ def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, n @pytest.mark.parametrize("cluster", [None, 2]) -def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster): - kernel, _ = get_saxpy_kernel +def test_occupancy_max_active_clusters(get_saxpy_kernel_cubin, cluster): + kernel, _ = get_saxpy_kernel_cubin dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") @@ -763,8 +397,8 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster): assert max_active_clusters >= 0 -def test_occupancy_max_potential_cluster_size(get_saxpy_kernel): - kernel, _ = get_saxpy_kernel +def test_occupancy_max_potential_cluster_size(get_saxpy_kernel_cubin): + kernel, _ = get_saxpy_kernel_cubin dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") @@ -778,8 +412,8 @@ def test_occupancy_max_potential_cluster_size(get_saxpy_kernel): assert max_potential_cluster_size >= 0 -def test_module_serialization_roundtrip(get_saxpy_kernel): - _, objcode = get_saxpy_kernel +def test_module_serialization_roundtrip(get_saxpy_kernel_cubin): + _, objcode = get_saxpy_kernel_cubin result = pickle.loads(pickle.dumps(objcode)) # noqa: S403, S301 assert isinstance(result, ObjectCode) From 3665e4189271bd559d95a89cc99eca38ace981c4 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 6 Oct 2025 01:21:23 +0000 Subject: [PATCH 11/12] minor fixes --- cuda_core/tests/test_module.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index be215c92c8..e3198e7902 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -67,7 +67,6 @@ def get_saxpy_kernel_cubin(init_cuda): "cubin", name_expressions=("saxpy", "saxpy"), ) - # run in single precision return mod.get_kernel("saxpy"), mod @@ -154,9 +153,9 @@ def test_object_code_load_ptx(get_saxpy_kernel_ptx): def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): ptx, mod = get_saxpy_kernel_ptx sym_map = mod._sym_map - assert isinstance(ptx, str) + assert isinstance(ptx, bytes) ptx_file = tmp_path / "test.ptx" - ptx_file.write_text(ptx) + ptx_file.write_bytes(ptx) mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map) assert mod_obj.code == str(ptx_file) assert mod_obj._code_type == "ptx" @@ -187,8 +186,8 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel_cubin, tmp_path): mod.get_kernel("saxpy") # force loading -def test_object_code_handle(get_saxpy_object_code): - mod = get_saxpy_object_code +def test_object_code_handle(get_saxpy_kernel_cubin): + _, mod = get_saxpy_kernel_cubin assert mod.handle is not None From 727ead575ec530c6f935a2dbabe8629d979a2d88 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 6 Oct 2025 01:30:28 +0000 Subject: [PATCH 12/12] make code_type public --- cuda_core/cuda/core/experimental/_module.py | 5 +++++ cuda_core/docs/source/release/0.X.Y-notes.rst | 1 + cuda_core/tests/test_module.py | 8 ++++---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 71293be4d1..2c7ea3a156 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -666,6 +666,11 @@ def name(self) -> str: """Return a human-readable name of this code object.""" return self._name + @property + def code_type(self) -> str: + """Return the type of the underlying code object.""" + return self._code_type + @property @precondition(_lazy_load_module) def handle(self): diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst index e87cbdee31..2fb4093214 100644 --- a/cuda_core/docs/source/release/0.X.Y-notes.rst +++ b/cuda_core/docs/source/release/0.X.Y-notes.rst @@ -32,6 +32,7 @@ New features - CUDA 13.x testing support through new ``test-cu13`` dependency group. - Stream-ordered memory allocation can now be shared on Linux via :class:`DeviceMemoryResource`. - Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``. +- Added an :attr:`ObjectCode.code_type` attribute for querying the code type. New examples diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index e3198e7902..49df966c08 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -158,7 +158,7 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): ptx_file.write_bytes(ptx) mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map) assert mod_obj.code == str(ptx_file) - assert mod_obj._code_type == "ptx" + assert mod_obj.code_type == "ptx" if not Program._can_load_generated_ptx(): pytest.skip("PTX version too new for current driver") mod_obj.get_kernel("saxpy") # force loading @@ -198,7 +198,7 @@ def test_object_code_load_ltoir(get_saxpy_kernel_ltoir): assert isinstance(ltoir, bytes) mod_obj = ObjectCode.from_ltoir(ltoir, symbol_mapping=sym_map) assert mod_obj.code == ltoir - assert mod_obj._code_type == "ltoir" + assert mod_obj.code_type == "ltoir" # ltoir doesn't support kernel retrieval directly as it's used for linking assert mod_obj._handle is None # Test that get_kernel fails for unsupported code type @@ -215,7 +215,7 @@ def test_object_code_load_ltoir_from_file(get_saxpy_kernel_ltoir, tmp_path): ltoir_file.write_bytes(ltoir) mod_obj = ObjectCode.from_ltoir(str(ltoir_file), symbol_mapping=sym_map) assert mod_obj.code == str(ltoir_file) - assert mod_obj._code_type == "ltoir" + assert mod_obj.code_type == "ltoir" # ltoir doesn't support kernel retrieval directly as it's used for linking assert mod_obj._handle is None @@ -418,4 +418,4 @@ def test_module_serialization_roundtrip(get_saxpy_kernel_cubin): assert isinstance(result, ObjectCode) assert objcode.code == result.code assert objcode._sym_map == result._sym_map - assert objcode._code_type == result._code_type + assert objcode.code_type == result.code_type