From 2d8c99a89e487942ec14ae440d4c68bd250b69c6 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 12 Jan 2026 13:54:50 -0800
Subject: [PATCH 1/2] initial localized test

---
 .../cuda/bindings/_internal/nvfatbin.pxd      |  22 ++
 .../bindings/_internal/nvfatbin_linux.pyx     | 242 ++++++++++++++++++
 .../bindings/_internal/nvfatbin_windows.pyx   | 233 +++++++++++++++++
 cuda_bindings/cuda/bindings/cynvfatbin.pxd    |  53 ++++
 cuda_bindings/cuda/bindings/cynvfatbin.pyx    |  38 +++
 cuda_bindings/cuda/bindings/nvfatbin.pxd      |  37 +++
 cuda_bindings/cuda/bindings/nvfatbin.pyx      | 194 ++++++++++++++
 cuda_bindings/tests/test_nvfatbin.py          |  89 +++++++
 8 files changed, 908 insertions(+)
 create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
 create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
 create mode 100644 cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
 create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pxd
 create mode 100644 cuda_bindings/cuda/bindings/cynvfatbin.pyx
 create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pxd
 create mode 100644 cuda_bindings/cuda/bindings/nvfatbin.pyx
 create mode 100644 cuda_bindings/tests/test_nvfatbin.py

diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
new file mode 100644
index 0000000000..14a8a6d608
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from ..cynvfatbin cimport *
+
+
+###############################################################################
+# Wrapper functions
+###############################################################################
+
+cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+
+
+
diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
new file mode 100644
index 0000000000..06143d9031
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
@@ -0,0 +1,242 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from libc.stdint cimport intptr_t, uintptr_t
+
+import threading
+from .utils import FunctionNotFoundError, NotSupportedError
+
+from cuda.pathfinder import load_nvidia_dynamic_lib
+
+
+###############################################################################
+# Extern
+###############################################################################
+
+# You must 'from .utils import NotSupportedError' before using this template
+
+cdef extern from "<dlfcn.h>" nogil:
+    void* dlopen(const char*, int)
+    char* dlerror()
+    void* dlsym(void*, const char*)
+    int dlclose(void*)
+
+    enum:
+        RTLD_LAZY
+        RTLD_NOW
+        RTLD_GLOBAL
+        RTLD_LOCAL
+
+    const void* RTLD_DEFAULT 'RTLD_DEFAULT'
+
+cdef int get_cuda_version():
+    cdef void* handle = NULL
+    cdef int err, driver_ver = 0
+
+    # Load driver to check version
+    handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL)
+    if handle == NULL:
+        err_msg = dlerror()
+        raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})')
+    cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion")
+    if cuDriverGetVersion == NULL:
+        raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1')
+    err = (<int (*)(int*) noexcept nogil>cuDriverGetVersion)(&driver_ver)
+    if err != 0:
+        raise RuntimeError(f'cuDriverGetVersion returned error code {err}')
+
+    return driver_ver
+
+
+
+###############################################################################
+# Wrapper init
+###############################################################################
+
+cdef object __symbol_lock = threading.Lock()
+cdef bint __py_nvfatbin_init = False
+
+cdef void* __nvFatbinCreate = NULL
+cdef void* __nvFatbinDestroy = NULL
+cdef void* __nvFatbinAddPTX = NULL
+cdef void* __nvFatbinSize = NULL
+cdef void* __nvFatbinGet = NULL
+cdef void* __nvFatbinVersion = NULL
+
+
+cdef void* load_library() except* with gil:
+    cdef uintptr_t handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint
+    return <void*>handle
+
+
+cdef int _init_nvfatbin() except -1 nogil:
+    global __py_nvfatbin_init
+
+    cdef void* handle = NULL
+
+    with gil, __symbol_lock:
+        # Recheck the flag after obtaining the locks
+        if __py_nvfatbin_init:
+            return 0
+
+        # Load function
+        global __nvFatbinCreate
+        __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate')
+        if __nvFatbinCreate == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinCreate = dlsym(handle, 'nvFatbinCreate')
+
+        global __nvFatbinDestroy
+        __nvFatbinDestroy = dlsym(RTLD_DEFAULT, 'nvFatbinDestroy')
+        if __nvFatbinDestroy == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinDestroy = dlsym(handle, 'nvFatbinDestroy')
+
+        global __nvFatbinAddPTX
+        __nvFatbinAddPTX = dlsym(RTLD_DEFAULT, 'nvFatbinAddPTX')
+        if __nvFatbinAddPTX == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX')
+
+        global __nvFatbinSize
+        __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize')
+        if __nvFatbinSize == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinSize = dlsym(handle, 'nvFatbinSize')
+
+        global __nvFatbinGet
+        __nvFatbinGet = dlsym(RTLD_DEFAULT, 'nvFatbinGet')
+        if __nvFatbinGet == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinGet = dlsym(handle, 'nvFatbinGet')
+
+        global __nvFatbinVersion
+        __nvFatbinVersion = dlsym(RTLD_DEFAULT, 'nvFatbinVersion')
+        if __nvFatbinVersion == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion')
+
+        __py_nvfatbin_init = True
+        return 0
+
+
+cdef inline int _check_or_init_nvfatbin() except -1 nogil:
+    if __py_nvfatbin_init:
+        return 0
+
+    return _init_nvfatbin()
+
+cdef dict func_ptrs = None
+
+
+cpdef dict _inspect_function_pointers():
+    global func_ptrs
+    if func_ptrs is not None:
+        return func_ptrs
+
+    _check_or_init_nvfatbin()
+    cdef dict data = {}
+
+    global __nvFatbinCreate
+    data["__nvFatbinCreate"] = <intptr_t>__nvFatbinCreate
+
+    global __nvFatbinDestroy
+    data["__nvFatbinDestroy"] = <intptr_t>__nvFatbinDestroy
+
+    global __nvFatbinAddPTX
+    data["__nvFatbinAddPTX"] = <intptr_t>__nvFatbinAddPTX
+
+    global __nvFatbinSize
+    data["__nvFatbinSize"] = <intptr_t>__nvFatbinSize
+
+    global __nvFatbinGet
+    data["__nvFatbinGet"] = <intptr_t>__nvFatbinGet
+
+    global __nvFatbinVersion
+    data["__nvFatbinVersion"] = <intptr_t>__nvFatbinVersion
+
+    func_ptrs = data
+    return data
+
+
+cpdef _inspect_function_pointer(str name):
+    global func_ptrs
+    if func_ptrs is None:
+        func_ptrs = _inspect_function_pointers()
+    return func_ptrs[name]
+
+
+###############################################################################
+# Wrapper functions
+###############################################################################
+
+cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinCreate
+    _check_or_init_nvfatbin()
+    if __nvFatbinCreate == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinCreate is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle*, const char**, size_t) noexcept nogil>__nvFatbinCreate)(
+        handle_indirect, options, optionsCount)
+
+
+cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinDestroy
+    _check_or_init_nvfatbin()
+    if __nvFatbinDestroy == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinDestroy is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle*) noexcept nogil>__nvFatbinDestroy)(
+        handle_indirect)
+
+
+cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddPTX
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddPTX == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddPTX is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const char*, size_t, const char*, const char*, const char*) noexcept nogil>__nvFatbinAddPTX)(
+        handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinSize
+    _check_or_init_nvfatbin()
+    if __nvFatbinSize == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinSize is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, size_t*) noexcept nogil>__nvFatbinSize)(
+        handle, size)
+
+
+cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinGet
+    _check_or_init_nvfatbin()
+    if __nvFatbinGet == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinGet is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, void*) noexcept nogil>__nvFatbinGet)(
+        handle, buffer)
+
+
+cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinVersion
+    _check_or_init_nvfatbin()
+    if __nvFatbinVersion == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinVersion is not found")
+    return (<nvFatbinResult (*)(unsigned int*, unsigned int*) noexcept nogil>__nvFatbinVersion)(
+        major, minor)
+
+
+
diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
new file mode 100644
index 0000000000..cc1824bf43
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
@@ -0,0 +1,233 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from libc.stdint cimport intptr_t
+
+import threading
+from .utils import FunctionNotFoundError, NotSupportedError
+
+from cuda.pathfinder import load_nvidia_dynamic_lib
+
+from libc.stddef cimport wchar_t
+from libc.stdint cimport uintptr_t
+from cpython cimport PyUnicode_AsWideCharString, PyMem_Free
+
+# You must 'from .utils import NotSupportedError' before using this template
+
+cdef extern from "windows.h" nogil:
+    ctypedef void* HMODULE
+    ctypedef void* HANDLE
+    ctypedef void* FARPROC
+    ctypedef unsigned long DWORD
+    ctypedef const wchar_t *LPCWSTR
+    ctypedef const char *LPCSTR
+
+    cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
+    cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000
+    cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
+
+    HMODULE _LoadLibraryExW "LoadLibraryExW"(
+        LPCWSTR lpLibFileName,
+        HANDLE hFile,
+        DWORD dwFlags
+    )
+
+    FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName)
+
+cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags):
+    cdef uintptr_t result
+    cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL)
+    with nogil:
+        result = <uintptr_t>_LoadLibraryExW(
+            wpath,
+            hFile,
+            dwFlags
+        )
+    PyMem_Free(wpath)
+    return result
+
+cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil:
+    return _GetProcAddress(<HMODULE>hModule, lpProcName)
+
+cdef int get_cuda_version():
+    cdef int err, driver_ver = 0
+
+    # Load driver to check version
+    handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32)
+    if handle == 0:
+        raise NotSupportedError('CUDA driver is not found')
+    cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion')
+    if cuDriverGetVersion == NULL:
+        raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll')
+    err = (<int (*)(int*) noexcept nogil>cuDriverGetVersion)(&driver_ver)
+    if err != 0:
+        raise RuntimeError(f'cuDriverGetVersion returned error code {err}')
+
+    return driver_ver
+
+
+
+###############################################################################
+# Wrapper init
+###############################################################################
+
+cdef object __symbol_lock = threading.Lock()
+cdef bint __py_nvfatbin_init = False
+
+cdef void* __nvFatbinCreate = NULL
+cdef void* __nvFatbinDestroy = NULL
+cdef void* __nvFatbinAddPTX = NULL
+cdef void* __nvFatbinSize = NULL
+cdef void* __nvFatbinGet = NULL
+cdef void* __nvFatbinVersion = NULL
+
+
+cdef int _init_nvfatbin() except -1 nogil:
+    global __py_nvfatbin_init
+
+    with gil, __symbol_lock:
+        # Recheck the flag after obtaining the locks
+        if __py_nvfatbin_init:
+            return 0
+
+        # Load library
+        handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint
+
+        # Load function
+        global __nvFatbinCreate
+        __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate')
+
+        global __nvFatbinDestroy
+        __nvFatbinDestroy = GetProcAddress(handle, 'nvFatbinDestroy')
+
+        global __nvFatbinAddPTX
+        __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX')
+
+        global __nvFatbinSize
+        __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize')
+
+        global __nvFatbinGet
+        __nvFatbinGet = GetProcAddress(handle, 'nvFatbinGet')
+
+        global __nvFatbinVersion
+        __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion')
+
+        __py_nvfatbin_init = True
+        return 0
+
+
+cdef inline int _check_or_init_nvfatbin() except -1 nogil:
+    if __py_nvfatbin_init:
+        return 0
+
+    return _init_nvfatbin()
+
+
+cdef dict func_ptrs = None
+
+
+cpdef dict _inspect_function_pointers():
+    global func_ptrs
+    if func_ptrs is not None:
+        return func_ptrs
+
+    _check_or_init_nvfatbin()
+    cdef dict data = {}
+
+    global __nvFatbinCreate
+    data["__nvFatbinCreate"] = <intptr_t>__nvFatbinCreate
+
+    global __nvFatbinDestroy
+    data["__nvFatbinDestroy"] = <intptr_t>__nvFatbinDestroy
+
+    global __nvFatbinAddPTX
+    data["__nvFatbinAddPTX"] = <intptr_t>__nvFatbinAddPTX
+
+    global __nvFatbinSize
+    data["__nvFatbinSize"] = <intptr_t>__nvFatbinSize
+
+    global __nvFatbinGet
+    data["__nvFatbinGet"] = <intptr_t>__nvFatbinGet
+
+    global __nvFatbinVersion
+    data["__nvFatbinVersion"] = <intptr_t>__nvFatbinVersion
+
+    func_ptrs = data
+    return data
+
+
+cpdef _inspect_function_pointer(str name):
+    global func_ptrs
+    if func_ptrs is None:
+        func_ptrs = _inspect_function_pointers()
+    return func_ptrs[name]
+
+
+###############################################################################
+# Wrapper functions
+###############################################################################
+
+cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinCreate
+    _check_or_init_nvfatbin()
+    if __nvFatbinCreate == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinCreate is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle*, const char**, size_t) noexcept nogil>__nvFatbinCreate)(
+        handle_indirect, options, optionsCount)
+
+
+cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinDestroy
+    _check_or_init_nvfatbin()
+    if __nvFatbinDestroy == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinDestroy is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle*) noexcept nogil>__nvFatbinDestroy)(
+        handle_indirect)
+
+
+cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddPTX
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddPTX == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddPTX is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const char*, size_t, const char*, const char*, const char*) noexcept nogil>__nvFatbinAddPTX)(
+        handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinSize
+    _check_or_init_nvfatbin()
+    if __nvFatbinSize == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinSize is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, size_t*) noexcept nogil>__nvFatbinSize)(
+        handle, size)
+
+
+cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinGet
+    _check_or_init_nvfatbin()
+    if __nvFatbinGet == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinGet is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, void*) noexcept nogil>__nvFatbinGet)(
+        handle, buffer)
+
+
+cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinVersion
+    _check_or_init_nvfatbin()
+    if __nvFatbinVersion == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinVersion is not found")
+    return (<nvFatbinResult (*)(unsigned int*, unsigned int*) noexcept nogil>__nvFatbinVersion)(
+        major, minor)
+
+
+
diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd
new file mode 100644
index 0000000000..651aa27152
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from libc.stdint cimport intptr_t, uint32_t
+
+
+###############################################################################
+# Types (structs, enums, ...)
+###############################################################################
+
+# enums
+ctypedef enum nvFatbinResult "nvFatbinResult":
+    NVFATBIN_SUCCESS "NVFATBIN_SUCCESS" = 0
+    NVFATBIN_ERROR_INTERNAL "NVFATBIN_ERROR_INTERNAL"
+    NVFATBIN_ERROR_ELF_ARCH_MISMATCH "NVFATBIN_ERROR_ELF_ARCH_MISMATCH"
+    NVFATBIN_ERROR_ELF_SIZE_MISMATCH "NVFATBIN_ERROR_ELF_SIZE_MISMATCH"
+    NVFATBIN_ERROR_MISSING_PTX_VERSION "NVFATBIN_ERROR_MISSING_PTX_VERSION"
+    NVFATBIN_ERROR_NULL_POINTER "NVFATBIN_ERROR_NULL_POINTER"
+    NVFATBIN_ERROR_COMPRESSION_FAILED "NVFATBIN_ERROR_COMPRESSION_FAILED"
+    NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED "NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED"
+    NVFATBIN_ERROR_UNRECOGNIZED_OPTION "NVFATBIN_ERROR_UNRECOGNIZED_OPTION"
+    NVFATBIN_ERROR_INVALID_ARCH "NVFATBIN_ERROR_INVALID_ARCH"
+    NVFATBIN_ERROR_INVALID_NVVM "NVFATBIN_ERROR_INVALID_NVVM"
+    NVFATBIN_ERROR_EMPTY_INPUT "NVFATBIN_ERROR_EMPTY_INPUT"
+    NVFATBIN_ERROR_MISSING_PTX_ARCH "NVFATBIN_ERROR_MISSING_PTX_ARCH"
+    NVFATBIN_ERROR_PTX_ARCH_MISMATCH "NVFATBIN_ERROR_PTX_ARCH_MISMATCH"
+    NVFATBIN_ERROR_MISSING_FATBIN "NVFATBIN_ERROR_MISSING_FATBIN"
+    NVFATBIN_ERROR_INVALID_INDEX "NVFATBIN_ERROR_INVALID_INDEX"
+    NVFATBIN_ERROR_IDENTIFIER_REUSE "NVFATBIN_ERROR_IDENTIFIER_REUSE"
+    NVFATBIN_ERROR_INTERNAL_PTX_OPTION "NVFATBIN_ERROR_INTERNAL_PTX_OPTION"
+    _NVFATBINRESULT_INTERNAL_LOADING_ERROR "_NVFATBINRESULT_INTERNAL_LOADING_ERROR" = -42
+
+
+# types
+ctypedef void* nvFatbinHandle 'nvFatbinHandle'
+
+
+###############################################################################
+# Functions
+###############################################################################
+
+cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+
+
+
diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx
new file mode 100644
index 0000000000..13c9ac2cc1
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from ._internal cimport nvfatbin as _nvfatbin
+
+
+###############################################################################
+# Wrapper functions
+###############################################################################
+
+cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount)
+
+
+cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinDestroy(handle_indirect)
+
+
+cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinSize(handle, size)
+
+
+cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinGet(handle, buffer)
+
+
+cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinVersion(major, minor)
+
+
+
diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd
new file mode 100644
index 0000000000..1350d0ed52
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+from libc.stdint cimport intptr_t, uint32_t
+
+from .cynvfatbin cimport *
+
+
+###############################################################################
+# Types
+###############################################################################
+
+ctypedef nvFatbinHandle Handle
+
+
+###############################################################################
+# Enum
+###############################################################################
+
+ctypedef nvFatbinResult _Result
+
+
+###############################################################################
+# Functions
+###############################################################################
+
+cpdef intptr_t create(options, size_t options_count) except -1
+cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line)
+cpdef size_t size(intptr_t handle) except? 0
+cpdef get(intptr_t handle, buffer)
+cpdef tuple version()
+
+
+
diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx
new file mode 100644
index 0000000000..dcc669797e
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx
@@ -0,0 +1,194 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+#
+# This code was automatically generated with version 13.0.0. Do not modify it directly.
+
+cimport cython  # NOQA
+
+from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr,
+                               get_buffer_pointer, get_resource_ptrs)
+
+from enum import IntEnum as _IntEnum
+from libcpp.vector cimport vector
+
+
+###############################################################################
+# Enum
+###############################################################################
+
+class Result(_IntEnum):
+    """See `nvFatbinResult`."""
+    SUCCESS = NVFATBIN_SUCCESS
+    ERROR_INTERNAL = NVFATBIN_ERROR_INTERNAL
+    ERROR_ELF_ARCH_MISMATCH = NVFATBIN_ERROR_ELF_ARCH_MISMATCH
+    ERROR_ELF_SIZE_MISMATCH = NVFATBIN_ERROR_ELF_SIZE_MISMATCH
+    ERROR_MISSING_PTX_VERSION = NVFATBIN_ERROR_MISSING_PTX_VERSION
+    ERROR_NULL_POINTER = NVFATBIN_ERROR_NULL_POINTER
+    ERROR_COMPRESSION_FAILED = NVFATBIN_ERROR_COMPRESSION_FAILED
+    ERROR_COMPRESSED_SIZE_EXCEEDED = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED
+    ERROR_UNRECOGNIZED_OPTION = NVFATBIN_ERROR_UNRECOGNIZED_OPTION
+    ERROR_INVALID_ARCH = NVFATBIN_ERROR_INVALID_ARCH
+    ERROR_INVALID_NVVM = NVFATBIN_ERROR_INVALID_NVVM
+    ERROR_EMPTY_INPUT = NVFATBIN_ERROR_EMPTY_INPUT
+    ERROR_MISSING_PTX_ARCH = NVFATBIN_ERROR_MISSING_PTX_ARCH
+    ERROR_PTX_ARCH_MISMATCH = NVFATBIN_ERROR_PTX_ARCH_MISMATCH
+    ERROR_MISSING_FATBIN = NVFATBIN_ERROR_MISSING_FATBIN
+    ERROR_INVALID_INDEX = NVFATBIN_ERROR_INVALID_INDEX
+    ERROR_IDENTIFIER_REUSE = NVFATBIN_ERROR_IDENTIFIER_REUSE
+    ERROR_INTERNAL_PTX_OPTION = NVFATBIN_ERROR_INTERNAL_PTX_OPTION
+
+
+###############################################################################
+# Error handling
+###############################################################################
+
+class nvfatbinError(Exception):
+
+    def __init__(self, status):
+        self.status = status
+        s = Result(status)
+        cdef str err = f"{s.name} ({s.value})"
+        super(nvfatbinError, self).__init__(err)
+
+    def __reduce__(self):
+        return (type(self), (self.status,))
+
+
+@cython.profile(False)
+cdef int check_status(int status) except 1 nogil:
+    if status != 0:
+        with gil:
+            raise nvfatbinError(status)
+    return status
+
+
+###############################################################################
+# Wrapper functions
+###############################################################################
+
+cpdef destroy(intptr_t handle):
+    """nvFatbinDestroy frees the memory associated with the given handle.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+
+    .. seealso:: `nvFatbinDestroy`
+    """
+    cdef Handle h = <Handle>handle
+    with nogil:
+        status = nvFatbinDestroy(&h)
+    check_status(status)
+
+
+cpdef intptr_t create(options, size_t options_count) except -1:
+    """nvFatbinCreate creates a new handle.
+
+    Args:
+        options (object): An array of strings, each containing a single option. It can be:
+
+            - an :class:`int` as the pointer address to the nested sequence, or
+            - a Python sequence of :class:`int`\s, each of which is a pointer address
+              to a valid sequence of 'char', or
+            - a nested Python sequence of ``str``.
+
+        options_count (size_t): Number of options.
+
+    Returns:
+        intptr_t: Address of nvFatbin handle.
+
+    .. seealso:: `nvFatbinCreate`
+    """
+    cdef nested_resource[ char ] _options_
+    get_nested_resource_ptr[char](_options_, options, <char*>NULL)
+    cdef Handle handle_indirect
+    with nogil:
+        __status__ = nvFatbinCreate(&handle_indirect, <const char**>(_options_.ptrs.data()), options_count)
+    check_status(__status__)
+    return <intptr_t>handle_indirect
+
+
+cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line):
+    """nvFatbinAddPTX adds PTX to the fatbinary.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+        code (bytes): The PTX code.
+        size (size_t): The size of the PTX code.
+        arch (str): The numerical architecture that this PTX is for (the XX of any sm_XX, lto_XX, or compute_XX).
+        identifier (str): Name of the PTX, useful when extracting the fatbin with tools like cuobjdump.
+        options_cmd_line (str): Options used during JIT compilation.
+
+    .. seealso:: `nvFatbinAddPTX`
+    """
+    cdef void* _code_ = get_buffer_pointer(code, size, readonly=True)
+    if not isinstance(arch, str):
+        raise TypeError("arch must be a Python str")
+    cdef bytes _temp_arch_ = (<str>arch).encode()
+    cdef char* _arch_ = _temp_arch_
+    if not isinstance(identifier, str):
+        raise TypeError("identifier must be a Python str")
+    cdef bytes _temp_identifier_ = (<str>identifier).encode()
+    cdef char* _identifier_ = _temp_identifier_
+    if not isinstance(options_cmd_line, str):
+        raise TypeError("options_cmd_line must be a Python str")
+    cdef bytes _temp_options_cmd_line_ = (<str>options_cmd_line).encode()
+    cdef char* _options_cmd_line_ = _temp_options_cmd_line_
+    with nogil:
+        __status__ = nvFatbinAddPTX(<Handle>handle, <const char*>_code_, size, <const char*>_arch_, <const char*>_identifier_, <const char*>_options_cmd_line_)
+    check_status(__status__)
+
+
+cpdef size_t size(intptr_t handle) except? 0:
+    """nvFatbinSize returns the fatbinary's size.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+
+    Returns:
+        size_t: The fatbinary's size.
+
+    .. seealso:: `nvFatbinSize`
+    """
+    cdef size_t size
+    with nogil:
+        __status__ = nvFatbinSize(<Handle>handle, &size)
+    check_status(__status__)
+    return size
+
+
+cpdef get(intptr_t handle, buffer):
+    """nvFatbinGet returns the completed fatbinary.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+        buffer (bytes): memory to store fatbinary.
+
+    .. seealso:: `nvFatbinGet`
+    """
+    cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False)
+    with nogil:
+        __status__ = nvFatbinGet(<Handle>handle, <void*>_buffer_)
+    check_status(__status__)
+
+
+cpdef tuple version():
+    """nvFatbinVersion returns the current version of nvFatbin.
+
+    Returns:
+        A 2-tuple containing:
+
+        - unsigned int: The major version.
+        - unsigned int: The minor version.
+
+    .. seealso:: `nvFatbinVersion`
+    """
+    cdef unsigned int major
+    cdef unsigned int minor
+    with nogil:
+        __status__ = nvFatbinVersion(&major, &minor)
+    check_status(__status__)
+    return (major, minor)
+
+
+
diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py
new file mode 100644
index 0000000000..627bd300e4
--- /dev/null
+++ b/cuda_bindings/tests/test_nvfatbin.py
@@ -0,0 +1,89 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+from cuda.bindings import nvfatbin
+
+import pytest
+
+ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"]
+PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"]
+
+PTX_TEMPLATE = """
+.version {PTX_VERSION}
+.target {ARCH}
+.address_size 64
+
+        // .globl       _Z6kernelPi
+
+.visible .entry _Z6kernelPi(
+        .param .u64 _Z6kernelPi_param_0
+)
+{{
+        .reg .b32       %r<7>;
+        .reg .b64       %rd<5>;
+
+
+        ld.param.u64    %rd1, [_Z6kernelPi_param_0];
+        cvta.to.global.u64      %rd2, %rd1;
+        mov.u32         %r1, %tid.x;
+        mov.u32         %r2, %ctaid.x;
+        mov.u32         %r3, %ntid.x;
+        mad.lo.s32      %r4, %r2, %r3, %r1;
+        mul.wide.s32    %rd3, %r4, 4;
+        add.s64         %rd4, %rd2, %rd3;
+        ld.global.u32   %r5, [%rd4];
+        add.s32         %r6, %r5, 1;
+        st.global.u32   [%rd4], %r6;
+        ret;
+
+}}
+"""
+
+@pytest.fixture(params=ARCHITECTURES)
+def arch(request):
+    return request.param
+
+@pytest.fixture(params=PTX_VERSIONS)
+def ptx_version(request):
+    return request.param
+
+@pytest.fixture
+def PTX(arch, ptx_version):
+    return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch)
+
+def test_nvfatbin_get_version():
+    major, minor = nvfatbin.version()
+    assert major is not None
+    assert minor is not None
+
+def test_nvfatbin_empty_create_and_destroy():
+    handle = nvfatbin.create([], 0)
+    assert handle is not None
+    nvfatbin.destroy(handle)
+
+def test_nvfatbin_invalid_input_create():
+    with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_UNRECOGNIZED_OPTION"):
+        nvfatbin.create(["--unsupported_option"], 1)
+
+
+def test_nvfatbin_get_empty():
+    handle = nvfatbin.create([], 0)
+    size = nvfatbin.size(handle)
+
+    buffer = bytearray(size)
+    nvfatbin.get(handle, buffer)
+
+    nvfatbin.destroy(handle)
+
+
+def test_nvfatbin_add_ptx(PTX, arch):
+    arch_numeric = arch.split("_")[1]
+
+    handle = nvfatbin.create([], 0)
+    nvfatbin.add_ptx(handle, PTX.encode(), len(PTX), arch_numeric, "add", f"-arch={arch}")
+
+    buffer = bytearray(nvfatbin.size(handle))
+
+    nvfatbin.get(handle, buffer)
+    nvfatbin.destroy(handle)
+

From 9b1a5590a1b4f34e91cbd78991154707615a7171 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 12 Jan 2026 20:08:30 -0800
Subject: [PATCH 2/2] add rest of APIs

---
 .../cuda/bindings/_internal/nvfatbin.pxd      |   5 +
 .../bindings/_internal/nvfatbin_linux.pyx     |  65 +++++++++
 .../bindings/_internal/nvfatbin_windows.pyx   |  53 ++++++++
 cuda_bindings/cuda/bindings/cynvfatbin.pxd    |   5 +
 cuda_bindings/cuda/bindings/cynvfatbin.pyx    |  14 ++
 cuda_bindings/cuda/bindings/nvfatbin.pxd      |   5 +
 cuda_bindings/cuda/bindings/nvfatbin.pyx      |  75 ++++++++++
 cuda_bindings/tests/test_nvfatbin.py          | 128 +++++++++++++++++-
 8 files changed, 349 insertions(+), 1 deletion(-)

diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
index 14a8a6d608..d421e8c21e 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd
@@ -14,9 +14,14 @@ from ..cynvfatbin cimport *
 cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
index 06143d9031..097043f69a 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx
@@ -62,6 +62,9 @@ cdef bint __py_nvfatbin_init = False
 cdef void* __nvFatbinCreate = NULL
 cdef void* __nvFatbinDestroy = NULL
 cdef void* __nvFatbinAddPTX = NULL
+cdef void* __nvFatbinAddCubin = NULL
+cdef void* __nvFatbinAddLTOIR = NULL
+cdef void* __nvFatbinAddReloc = NULL
 cdef void* __nvFatbinSize = NULL
 cdef void* __nvFatbinGet = NULL
 cdef void* __nvFatbinVersion = NULL
@@ -104,6 +107,27 @@ cdef int _init_nvfatbin() except -1 nogil:
                 handle = load_library()
             __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX')
 
+        global __nvFatbinAddCubin
+        __nvFatbinAddCubin = dlsym(RTLD_DEFAULT, 'nvFatbinAddCubin')
+        if __nvFatbinAddCubin == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinAddCubin = dlsym(handle, 'nvFatbinAddCubin')
+
+        global __nvFatbinAddLTOIR
+        __nvFatbinAddLTOIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddLTOIR')
+        if __nvFatbinAddLTOIR == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR')
+
+        global __nvFatbinAddReloc
+        __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc')
+        if __nvFatbinAddReloc == NULL:
+            if handle == NULL:
+                handle = load_library()
+            __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc')
+
         global __nvFatbinSize
         __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize')
         if __nvFatbinSize == NULL:
@@ -155,6 +179,15 @@ cpdef dict _inspect_function_pointers():
     global __nvFatbinAddPTX
     data["__nvFatbinAddPTX"] = <intptr_t>__nvFatbinAddPTX
 
+    global __nvFatbinAddCubin
+    data["__nvFatbinAddCubin"] = <intptr_t>__nvFatbinAddCubin
+
+    global __nvFatbinAddLTOIR
+    data["__nvFatbinAddLTOIR"] = <intptr_t>__nvFatbinAddLTOIR
+
+    global __nvFatbinAddReloc
+    data["__nvFatbinAddReloc"] = <intptr_t>__nvFatbinAddReloc
+
     global __nvFatbinSize
     data["__nvFatbinSize"] = <intptr_t>__nvFatbinSize
 
@@ -209,6 +242,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz
         handle, code, size, arch, identifier, optionsCmdLine)
 
 
+cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddCubin
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddCubin == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddCubin is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t, const char*, const char*) noexcept nogil>__nvFatbinAddCubin)(
+        handle, code, size, arch, identifier)
+
+
+cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddLTOIR
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddLTOIR == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t, const char*, const char*, const char*) noexcept nogil>__nvFatbinAddLTOIR)(
+        handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddReloc
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddReloc == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddReloc is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t) noexcept nogil>__nvFatbinAddReloc)(
+        handle, code, size)
+
+
 cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
     global __nvFatbinSize
     _check_or_init_nvfatbin()
@@ -240,3 +303,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
index cc1824bf43..a499637f0d 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx
@@ -80,6 +80,9 @@ cdef bint __py_nvfatbin_init = False
 cdef void* __nvFatbinCreate = NULL
 cdef void* __nvFatbinDestroy = NULL
 cdef void* __nvFatbinAddPTX = NULL
+cdef void* __nvFatbinAddCubin = NULL
+cdef void* __nvFatbinAddLTOIR = NULL
+cdef void* __nvFatbinAddReloc = NULL
 cdef void* __nvFatbinSize = NULL
 cdef void* __nvFatbinGet = NULL
 cdef void* __nvFatbinVersion = NULL
@@ -106,6 +109,15 @@ cdef int _init_nvfatbin() except -1 nogil:
         global __nvFatbinAddPTX
         __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX')
 
+        global __nvFatbinAddCubin
+        __nvFatbinAddCubin = GetProcAddress(handle, 'nvFatbinAddCubin')
+
+        global __nvFatbinAddLTOIR
+        __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR')
+
+        global __nvFatbinAddReloc
+        __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc')
+
         global __nvFatbinSize
         __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize')
 
@@ -146,6 +158,15 @@ cpdef dict _inspect_function_pointers():
     global __nvFatbinAddPTX
     data["__nvFatbinAddPTX"] = <intptr_t>__nvFatbinAddPTX
 
+    global __nvFatbinAddCubin
+    data["__nvFatbinAddCubin"] = <intptr_t>__nvFatbinAddCubin
+
+    global __nvFatbinAddLTOIR
+    data["__nvFatbinAddLTOIR"] = <intptr_t>__nvFatbinAddLTOIR
+
+    global __nvFatbinAddReloc
+    data["__nvFatbinAddReloc"] = <intptr_t>__nvFatbinAddReloc
+
     global __nvFatbinSize
     data["__nvFatbinSize"] = <intptr_t>__nvFatbinSize
 
@@ -200,6 +221,36 @@ cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, siz
         handle, code, size, arch, identifier, optionsCmdLine)
 
 
+cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddCubin
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddCubin == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddCubin is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t, const char*, const char*) noexcept nogil>__nvFatbinAddCubin)(
+        handle, code, size, arch, identifier)
+
+
+cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddLTOIR
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddLTOIR == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t, const char*, const char*, const char*) noexcept nogil>__nvFatbinAddLTOIR)(
+        handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    global __nvFatbinAddReloc
+    _check_or_init_nvfatbin()
+    if __nvFatbinAddReloc == NULL:
+        with gil:
+            raise FunctionNotFoundError("function nvFatbinAddReloc is not found")
+    return (<nvFatbinResult (*)(nvFatbinHandle, const void*, size_t) noexcept nogil>__nvFatbinAddReloc)(
+        handle, code, size)
+
+
 cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
     global __nvFatbinSize
     _check_or_init_nvfatbin()
@@ -231,3 +282,5 @@ cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) e
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd
index 651aa27152..55d8c83c1a 100644
--- a/cuda_bindings/cuda/bindings/cynvfatbin.pxd
+++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd
@@ -45,9 +45,14 @@ ctypedef void* nvFatbinHandle 'nvFatbinHandle'
 cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
+cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx
index 13c9ac2cc1..142f374c1b 100644
--- a/cuda_bindings/cuda/bindings/cynvfatbin.pyx
+++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx
@@ -23,6 +23,18 @@ cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size
     return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine)
 
 
+cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinAddCubin(handle, code, size, arch, identifier)
+
+
+cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine)
+
+
+cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
+    return _nvfatbin._nvFatbinAddReloc(handle, code, size)
+
+
 cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil:
     return _nvfatbin._nvFatbinSize(handle, size)
 
@@ -36,3 +48,5 @@ cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) ex
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd
index 1350d0ed52..54c793962b 100644
--- a/cuda_bindings/cuda/bindings/nvfatbin.pxd
+++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd
@@ -29,9 +29,14 @@ ctypedef nvFatbinResult _Result
 
 cpdef intptr_t create(options, size_t options_count) except -1
 cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line)
+cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier)
+cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line)
+cpdef add_reloc(intptr_t handle, code, size_t size)
 cpdef size_t size(intptr_t handle) except? 0
 cpdef get(intptr_t handle, buffer)
 cpdef tuple version()
 
 
 
+
+
diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx
index dcc669797e..92db285f8a 100644
--- a/cuda_bindings/cuda/bindings/nvfatbin.pyx
+++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx
@@ -139,6 +139,79 @@ cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_
     check_status(__status__)
 
 
+cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier):
+    """nvFatbinAddCubin adds a CUDA binary to the fatbinary.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+        code (bytes): The cubin.
+        size (size_t): The size of the cubin.
+        arch (str): The numerical architecture that this cubin is for (the XX of any sm_XX, lto_XX, or compute_XX).
+        identifier (str): Name of the cubin, useful when extracting the fatbin with tools like cuobjdump.
+
+    .. seealso:: `nvFatbinAddCubin`
+    """
+    cdef void* _code_ = get_buffer_pointer(code, size, readonly=True)
+    if not isinstance(arch, str):
+        raise TypeError("arch must be a Python str")
+    cdef bytes _temp_arch_ = (<str>arch).encode()
+    cdef char* _arch_ = _temp_arch_
+    if not isinstance(identifier, str):
+        raise TypeError("identifier must be a Python str")
+    cdef bytes _temp_identifier_ = (<str>identifier).encode()
+    cdef char* _identifier_ = _temp_identifier_
+    with nogil:
+        __status__ = nvFatbinAddCubin(<Handle>handle, <const void*>_code_, size, <const char*>_arch_, <const char*>_identifier_)
+    check_status(__status__)
+
+
+cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line):
+    """nvFatbinAddLTOIR adds LTOIR to the fatbinary.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+        code (bytes): The LTOIR code.
+        size (size_t): The size of the LTOIR code.
+        arch (str): The numerical architecture that this LTOIR is for (the XX of any sm_XX, lto_XX, or compute_XX).
+        identifier (str): Name of the LTOIR, useful when extracting the fatbin with tools like cuobjdump.
+        options_cmd_line (str): Options used during JIT compilation.
+
+    .. seealso:: `nvFatbinAddLTOIR`
+    """
+    cdef void* _code_ = get_buffer_pointer(code, size, readonly=True)
+    if not isinstance(arch, str):
+        raise TypeError("arch must be a Python str")
+    cdef bytes _temp_arch_ = (<str>arch).encode()
+    cdef char* _arch_ = _temp_arch_
+    if not isinstance(identifier, str):
+        raise TypeError("identifier must be a Python str")
+    cdef bytes _temp_identifier_ = (<str>identifier).encode()
+    cdef char* _identifier_ = _temp_identifier_
+    if not isinstance(options_cmd_line, str):
+        raise TypeError("options_cmd_line must be a Python str")
+    cdef bytes _temp_options_cmd_line_ = (<str>options_cmd_line).encode()
+    cdef char* _options_cmd_line_ = _temp_options_cmd_line_
+    with nogil:
+        __status__ = nvFatbinAddLTOIR(<Handle>handle, <const void*>_code_, size, <const char*>_arch_, <const char*>_identifier_, <const char*>_options_cmd_line_)
+    check_status(__status__)
+
+
+cpdef add_reloc(intptr_t handle, code, size_t size):
+    """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary.
+
+    Args:
+        handle (intptr_t): nvFatbin handle.
+        code (bytes): The host object image.
+        size (size_t): The size of the host object image code.
+
+    .. seealso:: `nvFatbinAddReloc`
+    """
+    cdef void* _code_ = get_buffer_pointer(code, size, readonly=True)
+    with nogil:
+        __status__ = nvFatbinAddReloc(<Handle>handle, <const void*>_code_, size)
+    check_status(__status__)
+
+
 cpdef size_t size(intptr_t handle) except? 0:
     """nvFatbinSize returns the fatbinary's size.
 
@@ -192,3 +265,5 @@ cpdef tuple version():
 
 
 
+
+
diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py
index 627bd300e4..3e893852b5 100644
--- a/cuda_bindings/tests/test_nvfatbin.py
+++ b/cuda_bindings/tests/test_nvfatbin.py
@@ -1,7 +1,9 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-from cuda.bindings import nvfatbin
+import subprocess
+
+from cuda.bindings import nvfatbin, nvrtc
 
 import pytest
 
@@ -39,6 +41,12 @@
 }}
 """
 
+CODE = """
+int __device__ inc(int x) {
+    return x + 1;
+}
+"""
+
 @pytest.fixture(params=ARCHITECTURES)
 def arch(request):
     return request.param
@@ -51,6 +59,63 @@ def ptx_version(request):
 def PTX(arch, ptx_version):
     return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch)
 
+@pytest.fixture
+def CUBIN(arch):
+    def CHECK_NVRTC(err):
+        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
+            raise RuntimeError(repr(err))
+
+    err, program_handle = nvrtc.nvrtcCreateProgram(CODE.encode(), b"", 0, [], [])
+    CHECK_NVRTC(err)
+    err = nvrtc.nvrtcCompileProgram(program_handle, 1, [f"-arch={arch}".encode()])[0]
+    CHECK_NVRTC(err)
+    err, size = nvrtc.nvrtcGetCUBINSize(program_handle)
+    CHECK_NVRTC(err)
+    cubin = b" " * size
+    (err,) = nvrtc.nvrtcGetCUBIN(program_handle, cubin)
+    CHECK_NVRTC(err)
+    (err,) = nvrtc.nvrtcDestroyProgram(program_handle)
+    CHECK_NVRTC(err)
+    return cubin
+
+# create a valid LTOIR input for testing
+@pytest.fixture
+def LTOIR(arch):
+    arch = arch.replace("sm", "compute")
+    def CHECK_NVRTC(err):
+        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
+            raise RuntimeError(repr(err))
+
+    empty_cplusplus_kernel = "__global__ void A() {}"
+    err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], [])
+    CHECK_NVRTC(err)
+    err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto", f"-arch={arch}".encode()])[0]
+    CHECK_NVRTC(err)
+    err, size = nvrtc.nvrtcGetLTOIRSize(program_handle)
+    CHECK_NVRTC(err)
+    empty_kernel_ltoir = b" " * size
+    (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir)
+    CHECK_NVRTC(err)
+    (err,) = nvrtc.nvrtcDestroyProgram(program_handle)
+    CHECK_NVRTC(err)
+    return empty_kernel_ltoir
+
+@pytest.fixture
+def OBJECT(arch, tmpdir):
+    if arch == "sm_100":
+        pytest.skip("sm_100 is not supported on local system.")
+
+    empty_cplusplus_kernel = "__global__ void A() {} int main() { return 0; }"
+    with open(tmpdir / "object.cu", "w") as f:
+        f.write(empty_cplusplus_kernel)
+
+    subprocess.check_output(["nvcc", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")])
+    with open(tmpdir / "object.o", "rb") as f:
+        object = f.read()
+
+    return object
+
+
 def test_nvfatbin_get_version():
     major, minor = nvfatbin.version()
     assert major is not None
@@ -87,3 +152,64 @@ def test_nvfatbin_add_ptx(PTX, arch):
     nvfatbin.get(handle, buffer)
     nvfatbin.destroy(handle)
 
+
+@pytest.mark.parametrize("arch", ["sm_80"], indirect=True)
+def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch):
+    handle = nvfatbin.create([], 0)
+    with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"):
+        nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc")
+
+    nvfatbin.destroy(handle)
+
+
+def test_nvfatbin_add_cubin(CUBIN, arch):
+    arch_numeric = arch.split("_")[1]
+
+    handle = nvfatbin.create([], 0)
+    nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), arch_numeric, "inc")
+
+    buffer = bytearray(nvfatbin.size(handle))
+
+    nvfatbin.get(handle, buffer)
+    nvfatbin.destroy(handle)
+
+
+@pytest.mark.parametrize("arch", ["sm_80"], indirect=True)
+def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch):
+    handle = nvfatbin.create([], 0)
+    with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"):
+        nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc")
+
+    nvfatbin.destroy(handle)
+
+
+def test_nvdfatbin_add_ltoir(LTOIR, arch):
+    arch_numeric = arch.split("_")[1]
+
+    handle = nvfatbin.create([], 0)
+    nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), arch_numeric, "inc", "")
+
+    buffer = bytearray(nvfatbin.size(handle))
+
+    nvfatbin.get(handle, buffer)
+    nvfatbin.destroy(handle)
+
+
+@pytest.mark.parametrize("arch", ["sm_80"], indirect=True)
+def test_nvdfatbin_add_ltoir_ELF_ARCH_MISMATCH(LTOIR, arch):
+    pytest.skip()
+    handle = nvfatbin.create([], 0)
+    with pytest.raises(nvfatbin.nvfatbinError, match="ERROR_ELF_ARCH_MISMATCH"):
+        nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), "75", "inc", "")
+
+    nvfatbin.destroy(handle)
+
+
+def test_nvfatbin_add_reloc(OBJECT):
+    handle = nvfatbin.create([], 0)
+    nvfatbin.add_reloc(handle, OBJECT, len(OBJECT))
+
+    buffer = bytearray(nvfatbin.size(handle))
+
+    nvfatbin.get(handle, buffer)
+    nvfatbin.destroy(handle)
\ No newline at end of file