From 79f894fe918f631047baaed73006fc0565673183 Mon Sep 17 00:00:00 2001
From: Rui Luo <ruluo@nvidia.com>
Date: Thu, 16 Apr 2026 14:25:00 +0800
Subject: [PATCH 1/2] tests: add coverage tests for cuda core

---
 cuda_core/tests/test_event.py    |  50 +++++++-
 cuda_core/tests/test_launcher.py |  49 ++++++++
 cuda_core/tests/test_linker.py   |  21 ++++
 cuda_core/tests/test_program.py  | 104 ++++++++++++++++
 cuda_core/tests/test_utils.py    | 199 +++++++++++++++++++++++++++++++
 5 files changed, 422 insertions(+), 1 deletion(-)

diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py
index ddceacd77c7..07b160e4206 100644
--- a/cuda_core/tests/test_event.py
+++ b/cuda_core/tests/test_event.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -193,6 +193,54 @@ def test_event_type_safety(init_cuda):
     assert (event is None) is False
 
 
+def test_event_isub_not_implemented(init_cuda):
+    """Event.__isub__ returns NotImplemented for non-Event types."""
+    device = Device()
+    stream = device.create_stream()
+    event = stream.record()
+    result = event.__isub__(42)
+    assert result is NotImplemented
+
+
+def test_event_rsub_not_implemented(init_cuda):
+    """Event.__rsub__ returns NotImplemented for non-Event types."""
+    device = Device()
+    stream = device.create_stream()
+    event = stream.record()
+    result = event.__rsub__(42)
+    assert result is NotImplemented
+
+
+def test_event_get_ipc_descriptor_non_ipc(init_cuda):
+    """get_ipc_descriptor raises RuntimeError on a non-IPC event."""
+    device = Device()
+    stream = device.create_stream()
+    event = stream.record()
+    with pytest.raises(RuntimeError, match="not IPC-enabled"):
+        event.get_ipc_descriptor()
+
+
+def test_event_is_done_false(init_cuda):
+    """Event.is_done returns False when captured work has not yet completed."""
+    device = Device()
+    latch = LatchKernel(device)
+    stream = device.create_stream()
+    latch.launch(stream)
+    event = stream.record()
+    # The latch holds the kernel; the event cannot be done yet.
+    assert event.is_done is False
+    latch.release()
+    event.sync()
+
+
+def test_ipc_event_descriptor_direct_init():
+    """IPCEventDescriptor cannot be instantiated directly."""
+    import cuda.core._event as _event_module
+
+    with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
+        _event_module.IPCEventDescriptor()
+
+
 # ============================================================================
 # Event Hash Tests
 # ============================================================================
diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py
index 899fdee4f54..06e941ac63a 100644
--- a/cuda_core/tests/test_launcher.py
+++ b/cuda_core/tests/test_launcher.py
@@ -387,3 +387,52 @@ def test_kernel_arg_unsupported_type():
 
     with pytest.raises(TypeError, match="unsupported type"):
         ParamHolder(["not_a_valid_kernel_arg"])
+
+
+def test_kernel_arg_ctypes_subclass_isinstance_fallback():
+    """Subclassed ctypes types hit the isinstance fallback in prepare_ctypes_arg."""
+    from cuda.core._kernel_arg_handler import ParamHolder
+
+    class MyInt32(ctypes.c_int32):
+        pass
+
+    class MyFloat(ctypes.c_float):
+        pass
+
+    class MyBool(ctypes.c_bool):
+        pass
+
+    # These should NOT raise — they should be handled via isinstance fallback
+    holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
+    assert holder.ptr != 0
+
+
+def test_kernel_arg_numpy_subclass_isinstance_fallback():
+    """Subclassed numpy scalars hit the isinstance fallback in prepare_numpy_arg."""
+    from cuda.core._kernel_arg_handler import ParamHolder
+
+    class MyInt32(np.int32):
+        pass
+
+    class MyFloat32(np.float32):
+        pass
+
+    holder = ParamHolder([MyInt32(7), MyFloat32(2.5)])
+    assert holder.ptr != 0
+
+
+def test_kernel_arg_python_isinstance_fallbacks():
+    """Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
+    from cuda.core._kernel_arg_handler import ParamHolder
+
+    class MyBool(int):
+        """type(x) is not int, so fast path skips; isinstance(x, int) catches it."""
+
+    class MyFloat(float):
+        pass
+
+    class MyComplex(complex):
+        pass
+
+    holder = ParamHolder([MyBool(1), MyFloat(1.5), MyComplex(1 + 2j)])
+    assert holder.ptr != 0
diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py
index 30a6a033495..0d4ff91dcd9 100644
--- a/cuda_core/tests/test_linker.py
+++ b/cuda_core/tests/test_linker.py
@@ -221,3 +221,24 @@ def test_linker_logs_cached_after_link(compile_ptx_functions):
     # Calling again should return the same observable values.
     assert linker.get_error_log() == err_log
     assert linker.get_info_log() == info_log
+
+
+def test_linker_handle(compile_ptx_functions):
+    """Linker.handle returns a non-null handle object."""
+    options = LinkerOptions(arch=ARCH)
+    linker = Linker(*compile_ptx_functions, options=options)
+    handle = linker.handle
+    assert handle is not None
+    assert int(handle) != 0
+
+
+@pytest.mark.skipif(is_culink_backend, reason="nvjitlink options only tested with nvjitlink backend")
+def test_linker_options_nvjitlink_options_as_str():
+    """_prepare_nvjitlink_options(as_bytes=False) returns plain strings."""
+    opts = LinkerOptions(arch=ARCH, debug=True, lineinfo=True)
+    options = opts._prepare_nvjitlink_options(as_bytes=False)
+    assert isinstance(options, list)
+    assert all(isinstance(o, str) for o in options)
+    assert f"-arch={ARCH}" in options
+    assert "-g" in options
+    assert "-lineinfo" in options
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index a062f3714e1..52107073a6c 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -773,3 +773,107 @@ def test_program_options_as_bytes_nvvm_unsupported_option():
     options = ProgramOptions(arch="sm_80", lineinfo=True)
     with pytest.raises(CUDAError, match="not supported by NVVM backend"):
         options.as_bytes("nvvm")
+
+
+def test_program_options_repr():
+    """ProgramOptions.__repr__ returns a human-readable string."""
+    opts = ProgramOptions(name="mykernel", arch="sm_80")
+    r = repr(opts)
+    assert "ProgramOptions" in r
+    assert "mykernel" in r
+    assert "sm_80" in r
+
+
+def test_program_options_bad_define_macro_short_tuple():
+    """define_macro with a 1-element tuple raises RuntimeError."""
+    opts = ProgramOptions(name="test", arch="sm_80", define_macro=("ONLY_NAME",))
+    with pytest.raises(RuntimeError, match="Expected define_macro tuple"):
+        opts.as_bytes("nvrtc")
+
+
+def test_program_options_bad_define_macro_non_str_value():
+    """define_macro tuple with a non-string value raises RuntimeError."""
+    opts = ProgramOptions(name="test", arch="sm_80", define_macro=("MY_MACRO", 99))
+    with pytest.raises(RuntimeError, match="Expected define_macro tuple"):
+        opts.as_bytes("nvrtc")
+
+
+def test_program_options_bad_define_macro_list_non_str():
+    """define_macro list containing a non-str/non-tuple item raises RuntimeError."""
+    opts = ProgramOptions(name="test", arch="sm_80", define_macro=[42])
+    with pytest.raises(RuntimeError, match="Expected define_macro"):
+        opts.as_bytes("nvrtc")
+
+
+def test_program_options_bad_define_macro_list_bad_tuple():
+    """define_macro list with a malformed tuple inside raises RuntimeError."""
+    opts = ProgramOptions(name="test", arch="sm_80", define_macro=[("ONLY_NAME",)])
+    with pytest.raises(RuntimeError, match="Expected define_macro"):
+        opts.as_bytes("nvrtc")
+
+
+def test_ptx_program_extra_sources_unsupported(ptx_code_object):
+    """PTX backend raises ValueError when extra_sources is specified."""
+    options = ProgramOptions(extra_sources=[("module1", b"data")])
+    with pytest.raises(ValueError, match="extra_sources is not supported by the PTX backend"):
+        Program(ptx_code_object.code.decode(), "ptx", options)
+
+
+def test_ptx_program_handle_is_linker_handle(init_cuda, ptx_code_object):
+    """Program.handle for the PTX backend delegates to the linker handle."""
+    program = Program(ptx_code_object.code.decode(), "ptx")
+    handle = program.handle
+    assert handle is not None
+    assert int(handle) != 0
+    program.close()
+
+
+@nvvm_available
+def test_nvvm_program_wrong_code_type():
+    """NVVM backend raises TypeError when code is not str/bytes/bytearray."""
+    with pytest.raises(TypeError, match="NVVM IR code must be provided as str, bytes, or bytearray"):
+        Program(42, "nvvm")
+
+
+def test_extra_sources_not_sequence():
+    """extra_sources must be a sequence; non-sequence raises TypeError."""
+    with pytest.raises(TypeError, match="extra_sources must be a sequence of 2-tuples"):
+        ProgramOptions(name="test", arch="sm_80", extra_sources=42)
+
+
+def test_extra_sources_bad_module_not_tuple():
+    """extra_sources items must be 2-tuples; non-tuple item raises TypeError."""
+    with pytest.raises(TypeError, match="Each extra module must be a 2-tuple"):
+        ProgramOptions(name="test", arch="sm_80", extra_sources=["not_a_tuple"])
+
+
+def test_extra_sources_bad_module_name_not_str():
+    """extra_sources module name must be a string; non-str raises TypeError."""
+    with pytest.raises(TypeError, match="Module name at index 0 must be a string"):
+        ProgramOptions(name="test", arch="sm_80", extra_sources=[(42, b"source")])
+
+
+def test_extra_sources_bad_module_source_wrong_type():
+    """extra_sources module source must be str/bytes/bytearray."""
+    with pytest.raises(TypeError, match="Module source at index 0 must be str"):
+        ProgramOptions(name="test", arch="sm_80", extra_sources=[("mod", 42)])
+
+
+def test_extra_sources_empty_source():
+    """extra_sources module source cannot be empty bytes."""
+    with pytest.raises(ValueError, match="Module source for 'mod'.*cannot be empty"):
+        ProgramOptions(name="test", arch="sm_80", extra_sources=[("mod", b"")])
+
+
+def test_nvrtc_compile_with_logs_capture(init_cuda):
+    """Program.compile with logs= exercises the NVRTC program-log reading path."""
+    import io
+
+    # #warning generates a non-empty NVRTC program log, ensuring logsize > 1.
+    code = '#warning "test log capture"\nextern "C" __global__ void my_kernel() {}'
+    program = Program(code, "c++")
+    logs = io.StringIO()
+    result = program.compile("ptx", logs=logs)
+    assert isinstance(result, ObjectCode)
+    assert logs.getvalue(), "Expected non-empty compilation log from #warning directive"
+    program.close()
diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py
index 4bdebcbde36..6228c2b2222 100644
--- a/cuda_core/tests/test_utils.py
+++ b/cuda_core/tests/test_utils.py
@@ -712,3 +712,202 @@ def test_ml_dtypes_bfloat16_dlpack_requires_ml_dtypes(init_cuda, no_ml_dtypes, a
     smv = api(a, stream_ptr=0)
     with pytest.raises(NotImplementedError, match=r"requires `ml_dtypes`"):
         smv.dtype  # noqa: B018
+
+
+def test_strided_memory_view_repr():
+    """__repr__ returns a descriptive string."""
+    src = np.arange(6, dtype=np.int32).reshape(2, 3)
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    r = repr(view)
+    assert r.startswith("StridedMemoryView(ptr=")
+
+
+def test_strided_memory_view_copy_to_raises():
+    """copy_to raises NotImplementedError."""
+    src = np.zeros(5, dtype=np.float32)
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    with pytest.raises(NotImplementedError, match="copy_to"):
+        view.copy_to(view, stream=None)
+
+
+def test_strided_memory_view_get_layout_error():
+    """get_layout raises ValueError for an empty (uninitialized) StridedMemoryView."""
+    with pytest.warns(DeprecationWarning, match="deprecated"):
+        view = StridedMemoryView()
+    with pytest.raises(ValueError, match="Cannot infer layout"):
+        _ = view._layout
+
+
+@pytest.mark.skipif(cp is None, reason="CuPy is not installed")
+def test_strided_memory_view_deprecated_cai_init(init_cuda):
+    """Deprecated StridedMemoryView(cai_obj) init path for CAI-only objects."""
+    src = cp.zeros(5, dtype=cp.float32)
+    dev = Device()
+    stream = dev.create_stream()
+    cai_only = _EnforceCAIView(src)
+    with pytest.deprecated_call():
+        view = StridedMemoryView(cai_only, stream_ptr=stream.handle)
+    assert view.is_device_accessible is True
+    assert view.ptr == src.data.ptr
+
+
+@pytest.mark.skipif(cp is None, reason="CuPy is not installed")
+def test_from_any_interface_cai_fallback(init_cuda):
+    """from_any_interface falls back to CAI when an object has no __dlpack__."""
+    src = cp.zeros(5, dtype=cp.float32)
+    dev = Device()
+    stream = dev.create_stream()
+    cai_only = _EnforceCAIView(src)
+    view = StridedMemoryView.from_any_interface(cai_only, stream_ptr=stream.handle)
+    assert view.is_device_accessible is True
+    assert view.ptr == src.data.ptr
+
+
+def test_strided_memory_view_copy_from_raises():
+    """copy_from raises NotImplementedError."""
+    src = np.zeros(5, dtype=np.float32)
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    with pytest.raises(NotImplementedError, match="copy_from"):
+        view.copy_from(view, stream=None)
+
+
+def test_strided_memory_view_view_no_args_returns_self():
+    """view() with layout=None and dtype=None returns self."""
+    src = np.arange(6, dtype=np.int32).reshape(2, 3)
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    same = view.view(layout=None, dtype=None)
+    assert same is view
+
+
+def test_strided_memory_view_view_with_dtype_only():
+    """view() with only dtype re-interprets using current layout."""
+    src = np.arange(4, dtype=np.float32)
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    viewed = view.view(dtype=np.dtype("int32"))
+    assert viewed.dtype == np.dtype("int32")
+    assert viewed._layout == view._layout
+
+
+def test_dlpack_export_structured_dtype_raises():
+    """Structured dtypes are rejected for DLPack export."""
+    dt = np.dtype([("x", np.float32), ("y", np.int32)])  # itemsize=8
+    # Create a valid view first, then re-view with the structured dtype to
+    # bypass numpy's own __dlpack__ rejection during import.
+    src = np.zeros(3, dtype=np.float64)  # itemsize=8 to match
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    bad_view = view.view(dtype=dt)
+    with pytest.raises(BufferError, match="Structured dtypes"):
+        bad_view.__dlpack__()
+
+
+def test_dlpack_export_unsupported_dtype_raises():
+    """Unsupported dtype kind is rejected for DLPack export."""
+    # numpy void dtype (kind='V', typestr='|V4') hits the else branch
+    # in _smv_dtype_numpy_to_dlpack at _memoryview.pyx:577
+    src = np.zeros(3, dtype=np.float32)  # itemsize=4 to match V4
+    view = StridedMemoryView.from_any_interface(src, stream_ptr=-1)
+    bad_view = view.view(dtype=np.dtype("V4"))
+    with pytest.raises(BufferError, match="Unsupported dtype for DLPack export"):
+        bad_view.__dlpack__()
+
+
+class _FakeCAIv2:
+    """Object with CUDA Array Interface v2 (unsupported)."""
+
+    def __init__(self):
+        self.__cuda_array_interface__ = {
+            "version": 2,
+            "shape": (5,),
+            "typestr": "<f4",
+            "data": (0, False),
+        }
+
+
+class _FakeCAIWithMask:
+    """Object with CUDA Array Interface that has a mask."""
+
+    def __init__(self):
+        self.__cuda_array_interface__ = {
+            "version": 3,
+            "shape": (5,),
+            "typestr": "<f4",
+            "data": (0, False),
+            "mask": np.ones(5, dtype=bool),
+        }
+
+
+class _FakeArrayInterfacev2:
+    """Object with NumPy Array Interface v2 (unsupported)."""
+
+    def __init__(self, arr):
+        iface = dict(arr.__array_interface__)
+        iface["version"] = 2
+        self.__array_interface__ = iface
+
+
+class _FakeArrayInterfaceWithMask:
+    """Object with NumPy Array Interface that has a mask."""
+
+    def __init__(self, arr):
+        iface = dict(arr.__array_interface__)
+        iface["mask"] = np.ones(arr.shape, dtype=bool)
+        self.__array_interface__ = iface
+
+
+def test_cai_v2_rejected():
+    """CUDA Array Interface v2 raises BufferError."""
+    from cuda.core._memoryview import view_as_cai
+
+    obj = _FakeCAIv2()
+    with pytest.raises(BufferError, match="v3 or above"):
+        view_as_cai(obj, stream_ptr=-1)
+
+
+def test_cai_mask_rejected():
+    """CUDA Array Interface with mask raises BufferError."""
+    from cuda.core._memoryview import view_as_cai
+
+    obj = _FakeCAIWithMask()
+    with pytest.raises(BufferError, match="mask is not supported"):
+        view_as_cai(obj, stream_ptr=-1)
+
+
+class _FakeCAIv3:
+    """Valid CUDA Array Interface v3 object (for stream=None test)."""
+
+    def __init__(self):
+        self.__cuda_array_interface__ = {
+            "version": 3,
+            "shape": (5,),
+            "typestr": "<f4",
+            "data": (0, False),
+        }
+
+
+def test_cai_stream_none_rejected():
+    """CUDA Array Interface with stream=None raises BufferError."""
+    from cuda.core._memoryview import view_as_cai
+
+    obj = _FakeCAIv3()
+    with pytest.raises(BufferError, match="stream=None is ambiguous"):
+        view_as_cai(obj, stream_ptr=None)
+
+
+def test_array_interface_v2_rejected():
+    """NumPy Array Interface v2 raises BufferError."""
+    from cuda.core._memoryview import view_as_array_interface
+
+    arr = np.zeros(5, dtype=np.float32)
+    obj = _FakeArrayInterfacev2(arr)
+    with pytest.raises(BufferError, match="v3 or above"):
+        view_as_array_interface(obj)
+
+
+def test_array_interface_mask_rejected():
+    """NumPy Array Interface with mask raises BufferError."""
+    from cuda.core._memoryview import view_as_array_interface
+
+    arr = np.zeros(5, dtype=np.float32)
+    obj = _FakeArrayInterfaceWithMask(arr)
+    with pytest.raises(BufferError, match="mask is not supported"):
+        view_as_array_interface(obj)

From 8e796a4dc628148206f22990d5a22ffc25514426 Mon Sep 17 00:00:00 2001
From: Rui Luo <ruluo@nvidia.com>
Date: Fri, 17 Apr 2026 13:22:16 +0800
Subject: [PATCH 2/2] tests: add launch-level coverage for ctypes/numpy
 subclass fallback

---
 cuda_core/tests/test_launcher.py | 80 ++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py
index 06e941ac63a..fb43e068dbb 100644
--- a/cuda_core/tests/test_launcher.py
+++ b/cuda_core/tests/test_launcher.py
@@ -407,6 +407,46 @@ class MyBool(ctypes.c_bool):
     assert holder.ptr != 0
 
 
+@requires_module(np, "2.1")
+def test_launch_scalar_argument_ctypes_subclass_fallback():
+    """Subclassed ctypes scalars survive the launch path and reach the kernel correctly."""
+
+    class MyInt32(ctypes.c_int32):
+        pass
+
+    dev = Device()
+    dev.set_current()
+
+    mr = LegacyPinnedMemoryResource()
+    b = mr.allocate(np.dtype(np.int32).itemsize)
+    arr = np.from_dlpack(b).view(np.int32)
+    arr[:] = 0
+
+    scalar = MyInt32(-123456)
+
+    code = r"""
+    template <typename T>
+    __global__ void write_scalar(T* arr, T val) {
+        arr[0] = val;
+    }
+    """
+
+    arch = "".join(f"{i}" for i in dev.compute_capability)
+    pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
+    prog = Program(code, code_type="c++", options=pro_opts)
+    ker_name = "write_scalar<signed int>"
+    mod = prog.compile("cubin", name_expressions=(ker_name,))
+    ker = mod.get_kernel(ker_name)
+
+    # This exercises the prepare_ctypes_arg isinstance fallback through a real launch.
+    stream = dev.default_stream
+    config = LaunchConfig(grid=1, block=1)
+    launch(stream, config, ker, arr.ctypes.data, scalar)
+    stream.sync()
+
+    assert arr[0] == scalar.value
+
+
 def test_kernel_arg_numpy_subclass_isinstance_fallback():
     """Subclassed numpy scalars hit the isinstance fallback in prepare_numpy_arg."""
     from cuda.core._kernel_arg_handler import ParamHolder
@@ -421,6 +461,46 @@ class MyFloat32(np.float32):
     assert holder.ptr != 0
 
 
+@requires_module(np, "2.1")
+def test_launch_scalar_argument_numpy_subclass_fallback():
+    """Subclassed numpy scalars survive the launch path and reach the kernel correctly."""
+
+    class MyFloat32(np.float32):
+        pass
+
+    dev = Device()
+    dev.set_current()
+
+    mr = LegacyPinnedMemoryResource()
+    b = mr.allocate(np.dtype(np.float32).itemsize)
+    arr = np.from_dlpack(b).view(np.float32)
+    arr[:] = 0.0
+
+    scalar = MyFloat32(3.14)
+
+    code = r"""
+    template <typename T>
+    __global__ void write_scalar(T* arr, T val) {
+        arr[0] = val;
+    }
+    """
+
+    arch = "".join(f"{i}" for i in dev.compute_capability)
+    pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
+    prog = Program(code, code_type="c++", options=pro_opts)
+    ker_name = "write_scalar<float>"
+    mod = prog.compile("cubin", name_expressions=(ker_name,))
+    ker = mod.get_kernel(ker_name)
+
+    # This exercises the prepare_numpy_arg isinstance fallback through a real launch.
+    stream = dev.default_stream
+    config = LaunchConfig(grid=1, block=1)
+    launch(stream, config, ker, arr.ctypes.data, scalar)
+    stream.sync()
+
+    assert arr[0] == scalar
+
+
 def test_kernel_arg_python_isinstance_fallbacks():
     """Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
     from cuda.core._kernel_arg_handler import ParamHolder