From 2d8c99a89e487942ec14ae440d4c68bd250b69c6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 13:54:50 -0800 Subject: [PATCH 1/2] initial localized test --- .../cuda/bindings/_internal/nvfatbin.pxd | 22 ++ .../bindings/_internal/nvfatbin_linux.pyx | 242 ++++++++++++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 233 +++++++++++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 53 ++++ cuda_bindings/cuda/bindings/cynvfatbin.pyx | 38 +++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 37 +++ cuda_bindings/cuda/bindings/nvfatbin.pyx | 194 ++++++++++++++ cuda_bindings/tests/test_nvfatbin.py | 89 +++++++ 8 files changed, 908 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pyx create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pxd create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pyx create mode 100644 cuda_bindings/tests/test_nvfatbin.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd new file mode 100644 index 0000000000..14a8a6d608 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ..cynvfatbin cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx new file mode 100644 index 0000000000..06143d9031 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -0,0 +1,242 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uintptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + + +############################################################################### +# Extern +############################################################################### + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef void* load_library() except* with gil: + cdef uintptr_t handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + return handle + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + cdef void* handle = NULL + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') + if __nvFatbinCreate == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinCreate = dlsym(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = dlsym(RTLD_DEFAULT, 'nvFatbinDestroy') + if __nvFatbinDestroy == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinDestroy = dlsym(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = dlsym(RTLD_DEFAULT, 'nvFatbinAddPTX') + if __nvFatbinAddPTX == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') + if __nvFatbinSize == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinSize = dlsym(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = dlsym(RTLD_DEFAULT, 'nvFatbinGet') + if __nvFatbinGet == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGet = dlsym(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = dlsym(RTLD_DEFAULT, 'nvFatbinVersion') + if __nvFatbinVersion == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx new file mode 100644 index 0000000000..cc1824bf43 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -0,0 +1,233 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + +from libc.stddef cimport wchar_t +from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "windows.h" nogil: + ctypedef void* HMODULE + ctypedef void* HANDLE + ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR + ctypedef const char *LPCSTR + + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + +cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: + return _GetProcAddress(hModule, lpProcName) + +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load library + handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + + # Load function + global __nvFatbinCreate + __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = GetProcAddress(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + + global __nvFatbinSize + __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = GetProcAddress(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd new file mode 100644 index 0000000000..651aa27152 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvFatbinResult "nvFatbinResult": + NVFATBIN_SUCCESS "NVFATBIN_SUCCESS" = 0 + NVFATBIN_ERROR_INTERNAL "NVFATBIN_ERROR_INTERNAL" + NVFATBIN_ERROR_ELF_ARCH_MISMATCH "NVFATBIN_ERROR_ELF_ARCH_MISMATCH" + NVFATBIN_ERROR_ELF_SIZE_MISMATCH "NVFATBIN_ERROR_ELF_SIZE_MISMATCH" + NVFATBIN_ERROR_MISSING_PTX_VERSION "NVFATBIN_ERROR_MISSING_PTX_VERSION" + NVFATBIN_ERROR_NULL_POINTER "NVFATBIN_ERROR_NULL_POINTER" + NVFATBIN_ERROR_COMPRESSION_FAILED "NVFATBIN_ERROR_COMPRESSION_FAILED" + NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED "NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED" + NVFATBIN_ERROR_UNRECOGNIZED_OPTION "NVFATBIN_ERROR_UNRECOGNIZED_OPTION" + NVFATBIN_ERROR_INVALID_ARCH "NVFATBIN_ERROR_INVALID_ARCH" + NVFATBIN_ERROR_INVALID_NVVM "NVFATBIN_ERROR_INVALID_NVVM" + NVFATBIN_ERROR_EMPTY_INPUT "NVFATBIN_ERROR_EMPTY_INPUT" + NVFATBIN_ERROR_MISSING_PTX_ARCH "NVFATBIN_ERROR_MISSING_PTX_ARCH" + NVFATBIN_ERROR_PTX_ARCH_MISMATCH "NVFATBIN_ERROR_PTX_ARCH_MISMATCH" + NVFATBIN_ERROR_MISSING_FATBIN "NVFATBIN_ERROR_MISSING_FATBIN" + NVFATBIN_ERROR_INVALID_INDEX "NVFATBIN_ERROR_INVALID_INDEX" + NVFATBIN_ERROR_IDENTIFIER_REUSE "NVFATBIN_ERROR_IDENTIFIER_REUSE" + NVFATBIN_ERROR_INTERNAL_PTX_OPTION "NVFATBIN_ERROR_INTERNAL_PTX_OPTION" + _NVFATBINRESULT_INTERNAL_LOADING_ERROR "_NVFATBINRESULT_INTERNAL_LOADING_ERROR" = -42 + + +# types +ctypedef void* nvFatbinHandle 'nvFatbinHandle' + + +############################################################################### +# Functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx new file mode 100644 index 0000000000..13c9ac2cc1 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ._internal cimport nvfatbin as _nvfatbin + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) + + +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinDestroy(handle_indirect) + + +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinSize(handle, size) + + +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinGet(handle, buffer) + + +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinVersion(major, minor) + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd new file mode 100644 index 0000000000..1350d0ed52 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvfatbin cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvFatbinHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvFatbinResult _Result + + +############################################################################### +# Functions +############################################################################### + +cpdef intptr_t create(options, size_t options_count) except -1 +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef size_t size(intptr_t handle) except? 0 +cpdef get(intptr_t handle, buffer) +cpdef tuple version() + + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx new file mode 100644 index 0000000000..dcc669797e --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -0,0 +1,194 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs) + +from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvFatbinResult`.""" + SUCCESS = NVFATBIN_SUCCESS + ERROR_INTERNAL = NVFATBIN_ERROR_INTERNAL + ERROR_ELF_ARCH_MISMATCH = NVFATBIN_ERROR_ELF_ARCH_MISMATCH + ERROR_ELF_SIZE_MISMATCH = NVFATBIN_ERROR_ELF_SIZE_MISMATCH + ERROR_MISSING_PTX_VERSION = NVFATBIN_ERROR_MISSING_PTX_VERSION + ERROR_NULL_POINTER = NVFATBIN_ERROR_NULL_POINTER + ERROR_COMPRESSION_FAILED = NVFATBIN_ERROR_COMPRESSION_FAILED + ERROR_COMPRESSED_SIZE_EXCEEDED = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED + ERROR_UNRECOGNIZED_OPTION = NVFATBIN_ERROR_UNRECOGNIZED_OPTION + ERROR_INVALID_ARCH = NVFATBIN_ERROR_INVALID_ARCH + ERROR_INVALID_NVVM = NVFATBIN_ERROR_INVALID_NVVM + ERROR_EMPTY_INPUT = NVFATBIN_ERROR_EMPTY_INPUT + ERROR_MISSING_PTX_ARCH = NVFATBIN_ERROR_MISSING_PTX_ARCH + ERROR_PTX_ARCH_MISMATCH = NVFATBIN_ERROR_PTX_ARCH_MISMATCH + ERROR_MISSING_FATBIN = NVFATBIN_ERROR_MISSING_FATBIN + ERROR_INVALID_INDEX = NVFATBIN_ERROR_INVALID_INDEX + ERROR_IDENTIFIER_REUSE = NVFATBIN_ERROR_IDENTIFIER_REUSE + ERROR_INTERNAL_PTX_OPTION = NVFATBIN_ERROR_INTERNAL_PTX_OPTION + + +############################################################################### +# Error handling +############################################################################### + +class nvfatbinError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvfatbinError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvfatbinError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy(intptr_t handle): + """nvFatbinDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvFatbin handle. + + .. seealso:: `nvFatbinDestroy` + """ + cdef Handle h = handle + with nogil: + status = nvFatbinDestroy(&h) + check_status(status) + + +cpdef intptr_t create(options, size_t options_count) except -1: + """nvFatbinCreate creates a new handle. + + Args: + options (object): An array of strings, each containing a single option. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + options_count (size_t): Number of options. + + Returns: + intptr_t: Address of nvFatbin handle. + + .. seealso:: `nvFatbinCreate` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + cdef Handle handle_indirect + with nogil: + __status__ = nvFatbinCreate(&handle_indirect, (_options_.ptrs.data()), options_count) + check_status(__status__) + return handle_indirect + + +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddPTX adds PTX to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The PTX code. + size (size_t): The size of the PTX code. + arch (str): The numerical architecture that this PTX is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the PTX, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddPTX` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddPTX(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef size_t size(intptr_t handle) except? 0: + """nvFatbinSize returns the fatbinary's size. + + Args: + handle (intptr_t): nvFatbin handle. + + Returns: + size_t: The fatbinary's size. + + .. seealso:: `nvFatbinSize` + """ + cdef size_t size + with nogil: + __status__ = nvFatbinSize(handle, &size) + check_status(__status__) + return size + + +cpdef get(intptr_t handle, buffer): + """nvFatbinGet returns the completed fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + buffer (bytes): memory to store fatbinary. + + .. seealso:: `nvFatbinGet` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + __status__ = nvFatbinGet(handle, _buffer_) + check_status(__status__) + + +cpdef tuple version(): + """nvFatbinVersion returns the current version of nvFatbin. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvFatbinVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + __status__ = nvFatbinVersion(&major, &minor) + check_status(__status__) + return (major, minor) + + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py new file mode 100644 index 0000000000..627bd300e4 --- /dev/null +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -0,0 +1,89 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings import nvfatbin + +import pytest + +ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] +PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] + +PTX_TEMPLATE = """ +.version {PTX_VERSION} +.target {ARCH} +.address_size 64 + + // .globl _Z6kernelPi + +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{{ + .reg .b32 %r<7>; + .reg .b64 %rd<5>; + + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + mov.u32 %r2, %ctaid.x; + mov.u32 %r3, %ntid.x; + mad.lo.s32 %r4, %r2, %r3, %r1; + mul.wide.s32 %rd3, %r4, 4; + add.s64 %rd4, %rd2, %rd3; + ld.global.u32 %r5, [%rd4]; + add.s32 %r6, %r5, 1; + st.global.u32 [%rd4], %r6; + ret; + +}} +""" + +@pytest.fixture(params=ARCHITECTURES) +def arch(request): + return request.param + +@pytest.fixture(params=PTX_VERSIONS) +def ptx_version(request): + return request.param + +@pytest.fixture +def PTX(arch, ptx_version): + return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + +def test_nvfatbin_get_version(): + major, minor = nvfatbin.version() + assert major is not None + assert minor is not None + +def test_nvfatbin_empty_create_and_destroy(): + handle = nvfatbin.create([], 0) + assert handle is not None + nvfatbin.destroy(handle) + +def test_nvfatbin_invalid_input_create(): + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + nvfatbin.create(["--unsupported_option"], 1) + + +def test_nvfatbin_get_empty(): + handle = nvfatbin.create([], 0) + size = nvfatbin.size(handle) + + buffer = bytearray(size) + nvfatbin.get(handle, buffer) + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_ptx(PTX, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ptx(handle, PTX.encode(), len(PTX), arch_numeric, "add", f"-arch={arch}") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + From 9b1a5590a1b4f34e91cbd78991154707615a7171 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jan 2026 20:08:30 -0800 Subject: [PATCH 2/2] add rest of APIs --- .../cuda/bindings/_internal/nvfatbin.pxd | 5 + .../bindings/_internal/nvfatbin_linux.pyx | 65 +++++++++ .../bindings/_internal/nvfatbin_windows.pyx | 53 ++++++++ cuda_bindings/cuda/bindings/cynvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/cynvfatbin.pyx | 14 ++ cuda_bindings/cuda/bindings/nvfatbin.pxd | 5 + cuda_bindings/cuda/bindings/nvfatbin.pyx | 75 ++++++++++ cuda_bindings/tests/test_nvfatbin.py | 128 +++++++++++++++++- 8 files changed, 349 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd index 14a8a6d608..d421e8c21e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -14,9 +14,14 @@ from ..cynvfatbin cimport * cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx index 06143d9031..097043f69a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -62,6 +62,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -104,6 +107,27 @@ cdef int _init_nvfatbin() except -1 nogil: handle = load_library() __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = dlsym(RTLD_DEFAULT, 'nvFatbinAddCubin') + if __nvFatbinAddCubin == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddCubin = dlsym(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddLTOIR') + if __nvFatbinAddLTOIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') if __nvFatbinSize == NULL: @@ -155,6 +179,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -209,6 +242,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -240,3 +303,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx index cc1824bf43..a499637f0d 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -80,6 +80,9 @@ cdef bint __py_nvfatbin_init = False cdef void* __nvFatbinCreate = NULL cdef void* __nvFatbinDestroy = NULL cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinAddReloc = NULL cdef void* __nvFatbinSize = NULL cdef void* __nvFatbinGet = NULL cdef void* __nvFatbinVersion = NULL @@ -106,6 +109,15 @@ cdef int _init_nvfatbin() except -1 nogil: global __nvFatbinAddPTX __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + global __nvFatbinAddCubin + __nvFatbinAddCubin = GetProcAddress(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + global __nvFatbinSize __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') @@ -146,6 +158,15 @@ cpdef dict _inspect_function_pointers(): global __nvFatbinAddPTX data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + global __nvFatbinSize data["__nvFatbinSize"] = __nvFatbinSize @@ -200,6 +221,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: global __nvFatbinSize _check_or_init_nvfatbin() @@ -231,3 +282,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd index 651aa27152..55d8c83c1a 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -45,9 +45,14 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle' cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil + + diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx index 13c9ac2cc1..142f374c1b 100644 --- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -23,6 +23,18 @@ cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddCubin(handle, code, size, arch, identifier) + + +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: return _nvfatbin._nvFatbinSize(handle, size) @@ -36,3 +48,5 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd index 1350d0ed52..54c793962b 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pxd +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -29,9 +29,14 @@ ctypedef nvFatbinResult _Result cpdef intptr_t create(options, size_t options_count) except -1 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_reloc(intptr_t handle, code, size_t size) cpdef size_t size(intptr_t handle) except? 0 cpdef get(intptr_t handle, buffer) cpdef tuple version() + + diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx index dcc669797e..92db285f8a 100644 --- a/cuda_bindings/cuda/bindings/nvfatbin.pyx +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -139,6 +139,79 @@ cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_ check_status(__status__) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier): + """nvFatbinAddCubin adds a CUDA binary to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The cubin. + size (size_t): The size of the cubin. + arch (str): The numerical architecture that this cubin is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the cubin, useful when extracting the fatbin with tools like cuobjdump. + + .. seealso:: `nvFatbinAddCubin` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + with nogil: + __status__ = nvFatbinAddCubin(handle, _code_, size, _arch_, _identifier_) + check_status(__status__) + + +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddLTOIR adds LTOIR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The LTOIR code. + size (size_t): The size of the LTOIR code. + arch (str): The numerical architecture that this LTOIR is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the LTOIR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddLTOIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddLTOIR(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + cpdef size_t size(intptr_t handle) except? 0: """nvFatbinSize returns the fatbinary's size. @@ -192,3 +265,5 @@ cpdef tuple version(): + + diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py index 627bd300e4..3e893852b5 100644 --- a/cuda_bindings/tests/test_nvfatbin.py +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings import nvfatbin +import subprocess + +from cuda.bindings import nvfatbin, nvrtc import pytest @@ -39,6 +41,12 @@ }} """ +CODE = """ +int __device__ inc(int x) { + return x + 1; +} +""" + @pytest.fixture(params=ARCHITECTURES) def arch(request): return request.param @@ -51,6 +59,63 @@ def ptx_version(request): def PTX(arch, ptx_version): return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) +@pytest.fixture +def CUBIN(arch): + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + err, program_handle = nvrtc.nvrtcCreateProgram(CODE.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetCUBINSize(program_handle) + CHECK_NVRTC(err) + cubin = b" " * size + (err,) = nvrtc.nvrtcGetCUBIN(program_handle, cubin) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return cubin + +# create a valid LTOIR input for testing +@pytest.fixture +def LTOIR(arch): + arch = arch.replace("sm", "compute") + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + empty_cplusplus_kernel = "__global__ void A() {}" + err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto", f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetLTOIRSize(program_handle) + CHECK_NVRTC(err) + empty_kernel_ltoir = b" " * size + (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return empty_kernel_ltoir + +@pytest.fixture +def OBJECT(arch, tmpdir): + if arch == "sm_100": + pytest.skip("sm_100 is not supported on local system.") + + empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")]) + with open(tmpdir / "object.o", "rb") as f: + object = f.read() + + return object + + def test_nvfatbin_get_version(): major, minor = nvfatbin.version() assert major is not None @@ -87,3 +152,64 @@ def test_nvfatbin_add_ptx(PTX, arch): nvfatbin.get(handle, buffer) nvfatbin.destroy(handle) + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_cubin(CUBIN, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), arch_numeric, "inc") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvdfatbin_add_ltoir(LTOIR, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), arch_numeric, "inc", "") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch): + pytest.skip() + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_reloc(OBJECT): + handle = nvfatbin.create([], 0) + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) \ No newline at end of file