From e06bee67b3d7f64fb8867fce75a40bb0c685eb7c Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 4 Feb 2025 14:21:51 -0800 Subject: [PATCH 1/8] remove jit options from lazy module load --- cuda_core/cuda/core/experimental/_module.py | 26 ++------------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 36178f5d71..4ac0c33b7f 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -128,37 +128,15 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): def _lazy_load_module(self, *args, **kwargs): if self._handle is not None: return - jit_options = self._jit_options module = self._module if isinstance(module, str): - # TODO: this option is only taken by the new library APIs, but we have - # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). - if jit_options is not None: - raise ValueError self._handle = handle_return(self._loader["file"](module)) else: assert isinstance(module, bytes) - if jit_options is None: - jit_options = {} if self._backend_version == "new": - args = ( - module, - list(jit_options.keys()), - list(jit_options.values()), - len(jit_options), - # TODO: support library options - [], - [], - 0, - ) + self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0)) else: # "old" backend - args = ( - module, - len(jit_options), - list(jit_options.keys()), - list(jit_options.values()), - ) - self._handle = handle_return(self._loader["data"](*args)) + self._handle = handle_return(self._loader["data"](module, 0, [], [])) @precondition(_lazy_load_module) def get_kernel(self, name): From f2d3d85a0bfdda009835f0d0faaa3d7f25674aa0 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 4 Feb 2025 14:34:20 -0800 Subject: [PATCH 2/8] add note to release notes --- cuda_core/docs/source/release/0.2.0-notes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 0a34f825a9..4fdb7dbfa7 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -3,7 +3,7 @@ ``cuda.core`` 0.2.0 Release Notes ================================= -Released on , 2024 +Released on , 2025 Highlights ---------- @@ -20,3 +20,4 @@ Breaking Changes - Change ``__cuda_stream__`` from attribute to method - The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. +- The internal constructor of :class:`~ObjectCode` no longer accepts the jit_options argument. Options are provided to upstream :class:`~ProgramOptions` or :class:`~LinkerOptions` instead. From 9bb4651c6c1c040eef9b1577a5c52059464f1709 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Feb 2025 05:55:08 +0000 Subject: [PATCH 3/8] organize ObjectCode __init__ --- cuda_core/cuda/core/experimental/_linker.py | 2 +- cuda_core/cuda/core/experimental/_module.py | 47 ++++++++++++-------- cuda_core/cuda/core/experimental/_program.py | 4 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 7736d7b2d2..2f84f3502a 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -439,7 +439,7 @@ def link(self, target_type) -> ObjectCode: addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle)) code = (ctypes.c_char * size).from_address(addr) - return ObjectCode(bytes(code), target_type) + return ObjectCode._init(bytes(code), target_type) def get_error_log(self) -> str: """Get the error log generated by the linker. diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 0687dc69e5..d713eb9fca 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - +from typing import Union from warnings import warn from cuda.core.experimental._utils import driver, get_binding_version, handle_return, precondition @@ -220,6 +220,12 @@ class ObjectCode: Loads the module library with specified module code and JIT options. + Note + ---- + The public constructor assumes that ``module`` is of code type "cubin". + For all other possible code types (ex: "ptx"), only :class:`~cuda.core.experimental.Program` + accepts them and returns an `ObjectCode` instance with its ``compile`` method. + Note ---- Usage under CUDA 11.x will only load to the current device @@ -228,32 +234,32 @@ class ObjectCode: Parameters ---------- module : Union[bytes, str] - Either a bytes object containing the module to load, or - a file path string containing that module for loading. - code_type : Any - String of the compiled type. - Supported options are "ptx", "cubin", "ltoir" and "fatbin". - jit_options : Optional - Mapping of JIT options to use during module loading. - (Default to no options) - symbol_mapping : Optional - Keyword argument dictionary specifying how symbol names - should be mapped before trying to retrieve them. - (Default to no mappings) - + Either a bytes object containing the cubin to load, or + a file path string pointing to the cubin to load. """ - __slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") - def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): + def __init__(self, module: Union[bytes, str]): + _lazy_init() + + # handle is assigned during _lazy_load + self._handle = None + self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" + self._loader = _backend[self._backend_version] + self._code_type = "cubin" + self._module = module + self._sym_map = {} + + def _init(module, code_type, *, symbol_mapping=None): + self = ObjectCode.__new__(ObjectCode) if code_type not in self._supported_code_type: raise ValueError _lazy_init() # handle is assigned during _lazy_load self._handle = None - self._jit_options = jit_options self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" self._loader = _backend[self._backend_version] @@ -262,6 +268,8 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): self._module = module self._sym_map = {} if symbol_mapping is None else symbol_mapping + return self + # TODO: do we want to unload in a finalizer? Probably not.. def _lazy_load_module(self, *args, **kwargs): @@ -269,7 +277,10 @@ def _lazy_load_module(self, *args, **kwargs): return module = self._module if isinstance(module, str): - self._handle = handle_return(self._loader["file"](module)) + if self._backend_version == "new": + self._handle = handle_return(self._loader["file"](module, [], [], 0, [], [], 0)) + else: # "old" backend + self._handle = handle_return(self._loader["file"](module)) else: assert isinstance(module, bytes) if self._backend_version == "new": diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index f938895ed7..b1fb0d90f9 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -386,7 +386,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): if not isinstance(code, str): raise TypeError("ptx Program expects code argument to be a string") self._linker = Linker( - ObjectCode(code.encode(), code_type), options=self._translate_program_options(options) + ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) self._backend = "linker" else: @@ -472,7 +472,7 @@ def compile(self, target_type, name_expressions=(), logs=None): handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) logs.write(log.decode()) - return ObjectCode(data, target_type, symbol_mapping=symbol_mapping) + return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) if self._backend == "linker": return self._linker.link(target_type) From 5349bd19b43f11efcb1bbd3936aaf009636fd6b9 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Feb 2025 06:30:32 +0000 Subject: [PATCH 4/8] expose ObjectCode to public + fix file loading --- cuda_core/cuda/core/experimental/__init__.py | 1 + cuda_core/cuda/core/experimental/_module.py | 16 +++++++----- cuda_core/docs/source/release/0.2.0-notes.rst | 18 ++++++++----- cuda_core/tests/test_module.py | 26 ++++++++++++++++--- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 3db9e8abb4..6e289d49b3 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -7,6 +7,7 @@ from cuda.core.experimental._event import EventOptions from cuda.core.experimental._launcher import LaunchConfig, launch from cuda.core.experimental._linker import Linker, LinkerOptions +from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._program import Program, ProgramOptions from cuda.core.experimental._stream import Stream, StreamOptions from cuda.core.experimental._system import System diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index d713eb9fca..3c55bccdb9 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from typing import Union +from typing import Optional, Union from warnings import warn from cuda.core.experimental._utils import driver, get_binding_version, handle_return, precondition @@ -236,12 +236,16 @@ class ObjectCode: module : Union[bytes, str] Either a bytes object containing the cubin to load, or a file path string pointing to the cubin to load. + symbol_mapping : Optional[dict] + A dictionary specifying how the unmangled symbol names (as keys) + should be mapped to the mangled names before trying to retrieve + them (default to no mappings). """ __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") - def __init__(self, module: Union[bytes, str]): + def __init__(self, module: Union[bytes, str], *, symbol_mapping: Optional[dict]=None): _lazy_init() # handle is assigned during _lazy_load @@ -250,9 +254,9 @@ def __init__(self, module: Union[bytes, str]): self._loader = _backend[self._backend_version] self._code_type = "cubin" self._module = module - self._sym_map = {} + self._sym_map = {} if symbol_mapping is None else symbol_mapping - def _init(module, code_type, *, symbol_mapping=None): + def _init(module, code_type, *, symbol_mapping: Optional[dict]=None): self = ObjectCode.__new__(ObjectCode) if code_type not in self._supported_code_type: raise ValueError @@ -278,9 +282,9 @@ def _lazy_load_module(self, *args, **kwargs): module = self._module if isinstance(module, str): if self._backend_version == "new": - self._handle = handle_return(self._loader["file"](module, [], [], 0, [], [], 0)) + self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0)) else: # "old" backend - self._handle = handle_return(self._loader["file"](module)) + self._handle = handle_return(self._loader["file"](module.encode())) else: assert isinstance(module, bytes) if self._backend_version == "new": diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index bf5705839a..47217453b0 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -12,15 +12,19 @@ Highlights - Add :class:`~DeviceProperties` to provide pythonic access to device properties. - Add kernel attributes to :class:`~Kernel` -Limitations ------------ - -- - Breaking Changes ---------------- - Change ``__cuda_stream__`` from attribute to method - The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. -- The internal constructor of :class:`~ObjectCode` no longer accepts the jit_options argument. Options are provided to upstream :class:`~ProgramOptions` or :class:`~LinkerOptions` instead. -- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. +- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. + +New features +------------ + +- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. + +Limitations +----------- + +- diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 9f126fa179..33501999df 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -10,7 +10,7 @@ import pytest from conftest import can_load_generated_ptx -from cuda.core.experimental import Program, ProgramOptions, system +from cuda.core.experimental import ObjectCode, Program, ProgramOptions, system @pytest.fixture(scope="function") @@ -37,7 +37,7 @@ def get_saxpy_kernel(init_cuda): ) # run in single precision - return mod.get_kernel("saxpy") + return mod.get_kernel("saxpy"), mod @pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new") @@ -72,7 +72,7 @@ def test_get_kernel(init_cuda): ], ) def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type): - kernel = get_saxpy_kernel + kernel, _ = get_saxpy_kernel method = getattr(kernel.attributes, attr) # get the value without providing a device ordinal value = method() @@ -82,3 +82,23 @@ def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type): for device in system.devices: value = method(device.device_id) assert isinstance(value, expected_type), f"Expected {attr} to be of type {expected_type}, but got {type(value)}" + + +def test_object_code_load_cubin(get_saxpy_kernel): + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + mod = ObjectCode(cubin, symbol_mapping=sym_map) + ker = mod.get_kernel("saxpy") + + +def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): + _, mod = get_saxpy_kernel + cubin = mod._module + sym_map = mod._sym_map + assert isinstance(cubin, bytes) + cubin_file = tmp_path / "test.cubin" + cubin_file.write_bytes(cubin) + mod = ObjectCode(str(cubin_file), symbol_mapping=sym_map) + ker = mod.get_kernel("saxpy") From 57a9b19026b3d15b4ad4af5bce1ccf3ebc05096d Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Feb 2025 06:31:01 +0000 Subject: [PATCH 5/8] update multi-gpu example --- .../examples/simple_multi_gpu_example.py | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index 7b83d844c6..98969e8fab 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -34,14 +34,8 @@ } """ arch0 = "".join(f"{i}" for i in dev0.compute_capability) -prog_add = Program(code_add, code_type="c++") -mod_add = prog_add.compile( - "cubin", - options=( - "-std=c++17", - "-arch=sm_" + arch0, - ), -) +prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"}) +mod_add = prog_add.compile("cubin") ker_add = mod_add.get_kernel("vector_add") # Set GPU 1 @@ -63,14 +57,8 @@ } """ arch1 = "".join(f"{i}" for i in dev1.compute_capability) -prog_sub = Program(code_sub, code_type="c++") -mod_sub = prog_sub.compile( - "cubin", - options=( - "-std=c++17", - "-arch=sm_" + arch1, - ), -) +prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"}) +mod_sub = prog_sub.compile("cubin") ker_sub = mod_sub.get_kernel("vector_sub") From 1d4576747cf4372ceaa27b5a3362b54817314c6a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 16 Feb 2025 06:33:51 +0000 Subject: [PATCH 6/8] make linter happy --- cuda_core/cuda/core/experimental/_module.py | 4 ++-- cuda_core/tests/test_module.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 3c55bccdb9..7119583af2 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -245,7 +245,7 @@ class ObjectCode: __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") - def __init__(self, module: Union[bytes, str], *, symbol_mapping: Optional[dict]=None): + def __init__(self, module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None): _lazy_init() # handle is assigned during _lazy_load @@ -256,7 +256,7 @@ def __init__(self, module: Union[bytes, str], *, symbol_mapping: Optional[dict]= self._module = module self._sym_map = {} if symbol_mapping is None else symbol_mapping - def _init(module, code_type, *, symbol_mapping: Optional[dict]=None): + def _init(module, code_type, *, symbol_mapping: Optional[dict] = None): self = ObjectCode.__new__(ObjectCode) if code_type not in self._supported_code_type: raise ValueError diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 33501999df..814a9e517e 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -90,7 +90,7 @@ def test_object_code_load_cubin(get_saxpy_kernel): sym_map = mod._sym_map assert isinstance(cubin, bytes) mod = ObjectCode(cubin, symbol_mapping=sym_map) - ker = mod.get_kernel("saxpy") + mod.get_kernel("saxpy") # force loading def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): @@ -101,4 +101,4 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): cubin_file = tmp_path / "test.cubin" cubin_file.write_bytes(cubin) mod = ObjectCode(str(cubin_file), symbol_mapping=sym_map) - ker = mod.get_kernel("saxpy") + mod.get_kernel("saxpy") # force loading From 87c157175620d2240403f974e105da96661435e5 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 18 Feb 2025 21:15:21 +0000 Subject: [PATCH 7/8] implement from_cubin; add docs; ensure get_kernel cannot be called with lto-ir --- cuda_core/cuda/core/experimental/_event.py | 4 +- cuda_core/cuda/core/experimental/_module.py | 63 +++++++++++---------- cuda_core/docs/source/api.rst | 1 + cuda_core/tests/test_module.py | 4 +- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index 07e87fb664..06005f95ae 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -65,9 +65,7 @@ def close(self): __slots__ = ("__weakref__", "_mnff", "_timing_disabled", "_busy_waited") def __init__(self): - raise NotImplementedError( - "directly creating an Event object can be ambiguous. Please call call Stream.record()." - ) + raise NotImplementedError("directly creating an Event object can be ambiguous. Please call Stream.record().") @staticmethod def _init(options: Optional[EventOptions] = None): diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 7119583af2..c4535c2799 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -213,53 +213,38 @@ def attributes(self): class ObjectCode: - """Represent a compiled program that was loaded onto the device. + """Represent a compiled program to be loaded onto the device. This object provides a unified interface for different types of - compiled programs that are loaded onto the device. - - Loads the module library with specified module code and JIT options. + compiled programs that will be loaded onto the device. Note ---- - The public constructor assumes that ``module`` is of code type "cubin". - For all other possible code types (ex: "ptx"), only :class:`~cuda.core.experimental.Program` - accepts them and returns an `ObjectCode` instance with its ``compile`` method. + This class has no default constructor. If you already have a cubin that you would + like to load, use the :meth:`from_cubin` alternative constructor. For all other + possible code types (ex: "ptx"), only :class:`~cuda.core.experimental.Program` + accepts them and returns an :class:`ObjectCode` instance with its + :meth:`~cuda.core.experimental.Program.compile` method. Note ---- Usage under CUDA 11.x will only load to the current device context. - - Parameters - ---------- - module : Union[bytes, str] - Either a bytes object containing the cubin to load, or - a file path string pointing to the cubin to load. - symbol_mapping : Optional[dict] - A dictionary specifying how the unmangled symbol names (as keys) - should be mapped to the mangled names before trying to retrieve - them (default to no mappings). """ __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") - def __init__(self, module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None): - _lazy_init() - - # handle is assigned during _lazy_load - self._handle = None - self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" - self._loader = _backend[self._backend_version] - self._code_type = "cubin" - self._module = module - self._sym_map = {} if symbol_mapping is None else symbol_mapping + def __init__(self): + raise NotImplementedError( + "directly creating an ObjectCode object can be ambiguous. Please either call Program.compile() " + "or one of the ObjectCode.from_*() constructors" + ) + @staticmethod def _init(module, code_type, *, symbol_mapping: Optional[dict] = None): self = ObjectCode.__new__(ObjectCode) - if code_type not in self._supported_code_type: - raise ValueError + assert code_type in self._supported_code_type, f"{code_type=} is not supported" _lazy_init() # handle is assigned during _lazy_load @@ -274,6 +259,22 @@ def _init(module, code_type, *, symbol_mapping: Optional[dict] = None): return self + @staticmethod + def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode": + """Create an :class:`ObjectCode` instance from an existing cubin. + + Parameters + ---------- + module : Union[bytes, str] + Either a bytes object containing the in-memory cubin to load, or + a file path string pointing to the on-disk cubin to load. + symbol_mapping : Optional[dict] + A dictionary specifying how the unmangled symbol names (as keys) + should be mapped to the mangled names before trying to retrieve + them (default to no mappings). + """ + return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping) + # TODO: do we want to unload in a finalizer? Probably not.. def _lazy_load_module(self, *args, **kwargs): @@ -307,6 +308,8 @@ def get_kernel(self, name): Newly created kernel object. """ + if self._code_type not in ("cubin", "ptx", "fatbin"): + raise RuntimeError(f"get_kernel() is not supported for {self._code_type}") try: name = self._sym_map[name] except KeyError: @@ -314,5 +317,3 @@ def get_kernel(self, name): data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) - - # TODO: implement from_handle() diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index f5ee30c1af..b52fda55d0 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -32,6 +32,7 @@ CUDA compilation toolchain Program Linker + ObjectCode :template: dataclass.rst diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 814a9e517e..f859142c97 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -89,7 +89,7 @@ def test_object_code_load_cubin(get_saxpy_kernel): cubin = mod._module sym_map = mod._sym_map assert isinstance(cubin, bytes) - mod = ObjectCode(cubin, symbol_mapping=sym_map) + mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map) mod.get_kernel("saxpy") # force loading @@ -100,5 +100,5 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): assert isinstance(cubin, bytes) cubin_file = tmp_path / "test.cubin" cubin_file.write_bytes(cubin) - mod = ObjectCode(str(cubin_file), symbol_mapping=sym_map) + mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map) mod.get_kernel("saxpy") # force loading From 380896cce8db4d8c8f1bdee3441437714303202b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 18 Feb 2025 17:58:13 -0500 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Vladislav Zhurba <53052066+vzhurba01@users.noreply.github.com> --- cuda_core/docs/source/release/0.2.0-notes.rst | 2 +- cuda_core/examples/simple_multi_gpu_example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 47217453b0..81e4e12924 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -17,7 +17,7 @@ Breaking Changes - Change ``__cuda_stream__`` from attribute to method - The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. -- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. +- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. New features ------------ diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index 98969e8fab..baa9547773 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -57,7 +57,7 @@ } """ arch1 = "".join(f"{i}" for i in dev1.compute_capability) -prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"}) +prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"}) mod_sub = prog_sub.compile("cubin") ker_sub = mod_sub.get_kernel("vector_sub")