Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion cuda_core/tests/test_event.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0


Expand Down Expand Up @@ -193,6 +193,54 @@ def test_event_type_safety(init_cuda):
assert (event is None) is False


def test_event_isub_not_implemented(init_cuda):
"""Event.__isub__ returns NotImplemented for non-Event types."""
device = Device()
stream = device.create_stream()
event = stream.record()
result = event.__isub__(42)
assert result is NotImplemented


def test_event_rsub_not_implemented(init_cuda):
"""Event.__rsub__ returns NotImplemented for non-Event types."""
device = Device()
stream = device.create_stream()
event = stream.record()
result = event.__rsub__(42)
assert result is NotImplemented


def test_event_get_ipc_descriptor_non_ipc(init_cuda):
"""get_ipc_descriptor raises RuntimeError on a non-IPC event."""
device = Device()
stream = device.create_stream()
event = stream.record()
with pytest.raises(RuntimeError, match="not IPC-enabled"):
event.get_ipc_descriptor()


def test_event_is_done_false(init_cuda):
"""Event.is_done returns False when captured work has not yet completed."""
device = Device()
latch = LatchKernel(device)
stream = device.create_stream()
latch.launch(stream)
event = stream.record()
# The latch holds the kernel; the event cannot be done yet.
assert event.is_done is False
latch.release()
event.sync()


def test_ipc_event_descriptor_direct_init():
"""IPCEventDescriptor cannot be instantiated directly."""
import cuda.core._event as _event_module

with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
_event_module.IPCEventDescriptor()


# ============================================================================
# Event Hash Tests
# ============================================================================
Expand Down
129 changes: 129 additions & 0 deletions cuda_core/tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,132 @@ def test_kernel_arg_unsupported_type():

with pytest.raises(TypeError, match="unsupported type"):
ParamHolder(["not_a_valid_kernel_arg"])


def test_kernel_arg_ctypes_subclass_isinstance_fallback():
"""Subclassed ctypes types hit the isinstance fallback in prepare_ctypes_arg."""
from cuda.core._kernel_arg_handler import ParamHolder

class MyInt32(ctypes.c_int32):
pass

class MyFloat(ctypes.c_float):
pass

class MyBool(ctypes.c_bool):
pass

# These should NOT raise — they should be handled via isinstance fallback
holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
assert holder.ptr != 0


@requires_module(np, "2.1")
def test_launch_scalar_argument_ctypes_subclass_fallback():
"""Subclassed ctypes scalars survive the launch path and reach the kernel correctly."""

class MyInt32(ctypes.c_int32):
pass

dev = Device()
dev.set_current()

mr = LegacyPinnedMemoryResource()
b = mr.allocate(np.dtype(np.int32).itemsize)
arr = np.from_dlpack(b).view(np.int32)
arr[:] = 0

scalar = MyInt32(-123456)

code = r"""
template <typename T>
__global__ void write_scalar(T* arr, T val) {
arr[0] = val;
}
"""

arch = "".join(f"{i}" for i in dev.compute_capability)
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
prog = Program(code, code_type="c++", options=pro_opts)
ker_name = "write_scalar<signed int>"
mod = prog.compile("cubin", name_expressions=(ker_name,))
ker = mod.get_kernel(ker_name)

# This exercises the prepare_ctypes_arg isinstance fallback through a real launch.
stream = dev.default_stream
config = LaunchConfig(grid=1, block=1)
launch(stream, config, ker, arr.ctypes.data, scalar)
stream.sync()

assert arr[0] == scalar.value


def test_kernel_arg_numpy_subclass_isinstance_fallback():
"""Subclassed numpy scalars hit the isinstance fallback in prepare_numpy_arg."""
from cuda.core._kernel_arg_handler import ParamHolder

class MyInt32(np.int32):
pass

class MyFloat32(np.float32):
pass

holder = ParamHolder([MyInt32(7), MyFloat32(2.5)])
assert holder.ptr != 0


@requires_module(np, "2.1")
def test_launch_scalar_argument_numpy_subclass_fallback():
"""Subclassed numpy scalars survive the launch path and reach the kernel correctly."""

class MyFloat32(np.float32):
pass

dev = Device()
dev.set_current()

mr = LegacyPinnedMemoryResource()
b = mr.allocate(np.dtype(np.float32).itemsize)
arr = np.from_dlpack(b).view(np.float32)
arr[:] = 0.0

scalar = MyFloat32(3.14)

code = r"""
template <typename T>
__global__ void write_scalar(T* arr, T val) {
arr[0] = val;
}
"""

arch = "".join(f"{i}" for i in dev.compute_capability)
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
prog = Program(code, code_type="c++", options=pro_opts)
ker_name = "write_scalar<float>"
mod = prog.compile("cubin", name_expressions=(ker_name,))
ker = mod.get_kernel(ker_name)

# This exercises the prepare_numpy_arg isinstance fallback through a real launch.
stream = dev.default_stream
config = LaunchConfig(grid=1, block=1)
launch(stream, config, ker, arr.ctypes.data, scalar)
stream.sync()

assert arr[0] == scalar


def test_kernel_arg_python_isinstance_fallbacks():
"""Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
from cuda.core._kernel_arg_handler import ParamHolder

class MyBool(int):
"""type(x) is not int, so fast path skips; isinstance(x, int) catches it."""

class MyFloat(float):
pass

class MyComplex(complex):
pass

holder = ParamHolder([MyBool(1), MyFloat(1.5), MyComplex(1 + 2j)])
assert holder.ptr != 0
21 changes: 21 additions & 0 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,24 @@ def test_linker_logs_cached_after_link(compile_ptx_functions):
# Calling again should return the same observable values.
assert linker.get_error_log() == err_log
assert linker.get_info_log() == info_log


def test_linker_handle(compile_ptx_functions):
"""Linker.handle returns a non-null handle object."""
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
handle = linker.handle
assert handle is not None
assert int(handle) != 0


@pytest.mark.skipif(is_culink_backend, reason="nvjitlink options only tested with nvjitlink backend")
def test_linker_options_nvjitlink_options_as_str():
"""_prepare_nvjitlink_options(as_bytes=False) returns plain strings."""
opts = LinkerOptions(arch=ARCH, debug=True, lineinfo=True)
options = opts._prepare_nvjitlink_options(as_bytes=False)
assert isinstance(options, list)
assert all(isinstance(o, str) for o in options)
assert f"-arch={ARCH}" in options
assert "-g" in options
assert "-lineinfo" in options
104 changes: 104 additions & 0 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,3 +773,107 @@ def test_program_options_as_bytes_nvvm_unsupported_option():
options = ProgramOptions(arch="sm_80", lineinfo=True)
with pytest.raises(CUDAError, match="not supported by NVVM backend"):
options.as_bytes("nvvm")


def test_program_options_repr():
"""ProgramOptions.__repr__ returns a human-readable string."""
opts = ProgramOptions(name="mykernel", arch="sm_80")
r = repr(opts)
assert "ProgramOptions" in r
assert "mykernel" in r
assert "sm_80" in r


def test_program_options_bad_define_macro_short_tuple():
"""define_macro with a 1-element tuple raises RuntimeError."""
opts = ProgramOptions(name="test", arch="sm_80", define_macro=("ONLY_NAME",))
with pytest.raises(RuntimeError, match="Expected define_macro tuple"):
opts.as_bytes("nvrtc")


def test_program_options_bad_define_macro_non_str_value():
"""define_macro tuple with a non-string value raises RuntimeError."""
opts = ProgramOptions(name="test", arch="sm_80", define_macro=("MY_MACRO", 99))
with pytest.raises(RuntimeError, match="Expected define_macro tuple"):
opts.as_bytes("nvrtc")


def test_program_options_bad_define_macro_list_non_str():
"""define_macro list containing a non-str/non-tuple item raises RuntimeError."""
opts = ProgramOptions(name="test", arch="sm_80", define_macro=[42])
with pytest.raises(RuntimeError, match="Expected define_macro"):
opts.as_bytes("nvrtc")


def test_program_options_bad_define_macro_list_bad_tuple():
"""define_macro list with a malformed tuple inside raises RuntimeError."""
opts = ProgramOptions(name="test", arch="sm_80", define_macro=[("ONLY_NAME",)])
with pytest.raises(RuntimeError, match="Expected define_macro"):
opts.as_bytes("nvrtc")


def test_ptx_program_extra_sources_unsupported(ptx_code_object):
"""PTX backend raises ValueError when extra_sources is specified."""
options = ProgramOptions(extra_sources=[("module1", b"data")])
with pytest.raises(ValueError, match="extra_sources is not supported by the PTX backend"):
Program(ptx_code_object.code.decode(), "ptx", options)


def test_ptx_program_handle_is_linker_handle(init_cuda, ptx_code_object):
"""Program.handle for the PTX backend delegates to the linker handle."""
program = Program(ptx_code_object.code.decode(), "ptx")
handle = program.handle
assert handle is not None
assert int(handle) != 0
program.close()


@nvvm_available
def test_nvvm_program_wrong_code_type():
"""NVVM backend raises TypeError when code is not str/bytes/bytearray."""
with pytest.raises(TypeError, match="NVVM IR code must be provided as str, bytes, or bytearray"):
Program(42, "nvvm")


def test_extra_sources_not_sequence():
"""extra_sources must be a sequence; non-sequence raises TypeError."""
with pytest.raises(TypeError, match="extra_sources must be a sequence of 2-tuples"):
ProgramOptions(name="test", arch="sm_80", extra_sources=42)


def test_extra_sources_bad_module_not_tuple():
"""extra_sources items must be 2-tuples; non-tuple item raises TypeError."""
with pytest.raises(TypeError, match="Each extra module must be a 2-tuple"):
ProgramOptions(name="test", arch="sm_80", extra_sources=["not_a_tuple"])


def test_extra_sources_bad_module_name_not_str():
"""extra_sources module name must be a string; non-str raises TypeError."""
with pytest.raises(TypeError, match="Module name at index 0 must be a string"):
ProgramOptions(name="test", arch="sm_80", extra_sources=[(42, b"source")])


def test_extra_sources_bad_module_source_wrong_type():
"""extra_sources module source must be str/bytes/bytearray."""
with pytest.raises(TypeError, match="Module source at index 0 must be str"):
ProgramOptions(name="test", arch="sm_80", extra_sources=[("mod", 42)])


def test_extra_sources_empty_source():
"""extra_sources module source cannot be empty bytes."""
with pytest.raises(ValueError, match="Module source for 'mod'.*cannot be empty"):
ProgramOptions(name="test", arch="sm_80", extra_sources=[("mod", b"")])


def test_nvrtc_compile_with_logs_capture(init_cuda):
"""Program.compile with logs= exercises the NVRTC program-log reading path."""
import io

# #warning generates a non-empty NVRTC program log, ensuring logsize > 1.
code = '#warning "test log capture"\nextern "C" __global__ void my_kernel() {}'
program = Program(code, "c++")
logs = io.StringIO()
result = program.compile("ptx", logs=logs)
assert isinstance(result, ObjectCode)
assert logs.getvalue(), "Expected non-empty compilation log from #warning directive"
program.close()
Loading
Loading