From f16dc1b674620f059af6ebc64290f754f86a7fed Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:24:00 -0800 Subject: [PATCH 01/26] Move all files from cuda/core/experimental/ to cuda/core/ [UNTESTED] This commit moves all implementation files from the experimental namespace to the core namespace. The experimental/ directory is kept for backward compatibility stubs (to be added in a later commit). Files moved: - All .py, .pyx, .pxd files from experimental/ to core/ - _memory/, _utils/, include/ subdirectories - __init__.pxd renamed to __init__experimental.pxd This is a file move only - imports will be updated in the next commit. --- .../core/{experimental/__init__.pxd => __init__experimental.pxd} | 0 cuda_core/cuda/core/{experimental => }/_context.pyx | 0 cuda_core/cuda/core/{experimental => }/_device.pyx | 0 cuda_core/cuda/core/{experimental => }/_dlpack.pxd | 0 cuda_core/cuda/core/{experimental => }/_dlpack.pyx | 0 cuda_core/cuda/core/{experimental => }/_event.pxd | 0 cuda_core/cuda/core/{experimental => }/_event.pyx | 0 cuda_core/cuda/core/{experimental => }/_graph.py | 0 cuda_core/cuda/core/{experimental => }/_kernel_arg_handler.pyx | 0 cuda_core/cuda/core/{experimental => }/_launch_config.pxd | 0 cuda_core/cuda/core/{experimental => }/_launch_config.pyx | 0 cuda_core/cuda/core/{experimental => }/_launcher.pyx | 0 cuda_core/cuda/core/{experimental => }/_linker.py | 0 cuda_core/cuda/core/{experimental => }/_memory/__init__.py | 0 cuda_core/cuda/core/{experimental => }/_memory/_buffer.pxd | 0 cuda_core/cuda/core/{experimental => }/_memory/_buffer.pyx | 0 .../core/{experimental => }/_memory/_device_memory_resource.pxd | 0 .../core/{experimental => }/_memory/_device_memory_resource.pyx | 0 .../core/{experimental => }/_memory/_graph_memory_resource.pxd | 0 .../core/{experimental => }/_memory/_graph_memory_resource.pyx | 0 cuda_core/cuda/core/{experimental => }/_memory/_ipc.pxd | 0 cuda_core/cuda/core/{experimental => }/_memory/_ipc.pyx | 0 cuda_core/cuda/core/{experimental => }/_memory/_legacy.py | 0 .../core/{experimental => }/_memory/_virtual_memory_resource.py | 0 cuda_core/cuda/core/{experimental => }/_memoryview.pyx | 0 cuda_core/cuda/core/{experimental => }/_module.py | 0 cuda_core/cuda/core/{experimental => }/_program.py | 0 cuda_core/cuda/core/{experimental => }/_stream.pxd | 0 cuda_core/cuda/core/{experimental => }/_stream.pyx | 0 cuda_core/cuda/core/{experimental => }/_system.py | 0 cuda_core/cuda/core/{experimental => }/_utils/__init__.pxd | 0 cuda_core/cuda/core/{experimental => }/_utils/__init__.py | 0 .../cuda/core/{experimental => }/_utils/clear_error_support.py | 0 cuda_core/cuda/core/{experimental => }/_utils/cuda_utils.pxd | 0 cuda_core/cuda/core/{experimental => }/_utils/cuda_utils.pyx | 0 .../{experimental => }/_utils/driver_cu_result_explanations.py | 0 .../{experimental => }/_utils/runtime_cuda_error_explanations.py | 0 cuda_core/cuda/core/{experimental => }/include/dlpack.h | 0 cuda_core/cuda/core/{experimental => }/include/utility.hpp | 0 cuda_core/cuda/core/{experimental => }/utils.py | 0 40 files changed, 0 insertions(+), 0 deletions(-) rename cuda_core/cuda/core/{experimental/__init__.pxd => __init__experimental.pxd} (100%) rename cuda_core/cuda/core/{experimental => }/_context.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_device.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_dlpack.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_dlpack.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_event.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_event.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_graph.py (100%) rename cuda_core/cuda/core/{experimental => }/_kernel_arg_handler.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_launch_config.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_launch_config.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_launcher.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_linker.py (100%) rename cuda_core/cuda/core/{experimental => }/_memory/__init__.py (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_buffer.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_buffer.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_device_memory_resource.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_device_memory_resource.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_graph_memory_resource.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_graph_memory_resource.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_ipc.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_ipc.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_legacy.py (100%) rename cuda_core/cuda/core/{experimental => }/_memory/_virtual_memory_resource.py (100%) rename cuda_core/cuda/core/{experimental => }/_memoryview.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_module.py (100%) rename cuda_core/cuda/core/{experimental => }/_program.py (100%) rename cuda_core/cuda/core/{experimental => }/_stream.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_stream.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_system.py (100%) rename cuda_core/cuda/core/{experimental => }/_utils/__init__.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_utils/__init__.py (100%) rename cuda_core/cuda/core/{experimental => }/_utils/clear_error_support.py (100%) rename cuda_core/cuda/core/{experimental => }/_utils/cuda_utils.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_utils/cuda_utils.pyx (100%) rename cuda_core/cuda/core/{experimental => }/_utils/driver_cu_result_explanations.py (100%) rename cuda_core/cuda/core/{experimental => }/_utils/runtime_cuda_error_explanations.py (100%) rename cuda_core/cuda/core/{experimental => }/include/dlpack.h (100%) rename cuda_core/cuda/core/{experimental => }/include/utility.hpp (100%) rename cuda_core/cuda/core/{experimental => }/utils.py (100%) diff --git a/cuda_core/cuda/core/experimental/__init__.pxd b/cuda_core/cuda/core/__init__experimental.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/__init__.pxd rename to cuda_core/cuda/core/__init__experimental.pxd diff --git a/cuda_core/cuda/core/experimental/_context.pyx b/cuda_core/cuda/core/_context.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_context.pyx rename to cuda_core/cuda/core/_context.pyx diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/_device.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_device.pyx rename to cuda_core/cuda/core/_device.pyx diff --git a/cuda_core/cuda/core/experimental/_dlpack.pxd b/cuda_core/cuda/core/_dlpack.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_dlpack.pxd rename to cuda_core/cuda/core/_dlpack.pxd diff --git a/cuda_core/cuda/core/experimental/_dlpack.pyx b/cuda_core/cuda/core/_dlpack.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_dlpack.pyx rename to cuda_core/cuda/core/_dlpack.pyx diff --git a/cuda_core/cuda/core/experimental/_event.pxd b/cuda_core/cuda/core/_event.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_event.pxd rename to cuda_core/cuda/core/_event.pxd diff --git a/cuda_core/cuda/core/experimental/_event.pyx b/cuda_core/cuda/core/_event.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_event.pyx rename to cuda_core/cuda/core/_event.pyx diff --git a/cuda_core/cuda/core/experimental/_graph.py b/cuda_core/cuda/core/_graph.py similarity index 100% rename from cuda_core/cuda/core/experimental/_graph.py rename to cuda_core/cuda/core/_graph.py diff --git a/cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx b/cuda_core/cuda/core/_kernel_arg_handler.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx rename to cuda_core/cuda/core/_kernel_arg_handler.pyx diff --git a/cuda_core/cuda/core/experimental/_launch_config.pxd b/cuda_core/cuda/core/_launch_config.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_launch_config.pxd rename to cuda_core/cuda/core/_launch_config.pxd diff --git a/cuda_core/cuda/core/experimental/_launch_config.pyx b/cuda_core/cuda/core/_launch_config.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_launch_config.pyx rename to cuda_core/cuda/core/_launch_config.pyx diff --git a/cuda_core/cuda/core/experimental/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_launcher.pyx rename to cuda_core/cuda/core/_launcher.pyx diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/_linker.py similarity index 100% rename from cuda_core/cuda/core/experimental/_linker.py rename to cuda_core/cuda/core/_linker.py diff --git a/cuda_core/cuda/core/experimental/_memory/__init__.py b/cuda_core/cuda/core/_memory/__init__.py similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/__init__.py rename to cuda_core/cuda/core/_memory/__init__.py diff --git a/cuda_core/cuda/core/experimental/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_buffer.pxd rename to cuda_core/cuda/core/_memory/_buffer.pxd diff --git a/cuda_core/cuda/core/experimental/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_buffer.pyx rename to cuda_core/cuda/core/_memory/_buffer.pyx diff --git a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pxd b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pxd rename to cuda_core/cuda/core/_memory/_device_memory_resource.pxd diff --git a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx rename to cuda_core/cuda/core/_memory/_device_memory_resource.pyx diff --git a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pxd b/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pxd rename to cuda_core/cuda/core/_memory/_graph_memory_resource.pxd diff --git a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pyx rename to cuda_core/cuda/core/_memory/_graph_memory_resource.pyx diff --git a/cuda_core/cuda/core/experimental/_memory/_ipc.pxd b/cuda_core/cuda/core/_memory/_ipc.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_ipc.pxd rename to cuda_core/cuda/core/_memory/_ipc.pxd diff --git a/cuda_core/cuda/core/experimental/_memory/_ipc.pyx b/cuda_core/cuda/core/_memory/_ipc.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_ipc.pyx rename to cuda_core/cuda/core/_memory/_ipc.pyx diff --git a/cuda_core/cuda/core/experimental/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_legacy.py rename to cuda_core/cuda/core/_memory/_legacy.py diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py rename to cuda_core/cuda/core/_memory/_virtual_memory_resource.py diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_memoryview.pyx rename to cuda_core/cuda/core/_memoryview.pyx diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/_module.py similarity index 100% rename from cuda_core/cuda/core/experimental/_module.py rename to cuda_core/cuda/core/_module.py diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/_program.py similarity index 100% rename from cuda_core/cuda/core/experimental/_program.py rename to cuda_core/cuda/core/_program.py diff --git a/cuda_core/cuda/core/experimental/_stream.pxd b/cuda_core/cuda/core/_stream.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_stream.pxd rename to cuda_core/cuda/core/_stream.pxd diff --git a/cuda_core/cuda/core/experimental/_stream.pyx b/cuda_core/cuda/core/_stream.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_stream.pyx rename to cuda_core/cuda/core/_stream.pyx diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/_system.py similarity index 100% rename from cuda_core/cuda/core/experimental/_system.py rename to cuda_core/cuda/core/_system.py diff --git a/cuda_core/cuda/core/experimental/_utils/__init__.pxd b/cuda_core/cuda/core/_utils/__init__.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/__init__.pxd rename to cuda_core/cuda/core/_utils/__init__.pxd diff --git a/cuda_core/cuda/core/experimental/_utils/__init__.py b/cuda_core/cuda/core/_utils/__init__.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/__init__.py rename to cuda_core/cuda/core/_utils/__init__.py diff --git a/cuda_core/cuda/core/experimental/_utils/clear_error_support.py b/cuda_core/cuda/core/_utils/clear_error_support.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/clear_error_support.py rename to cuda_core/cuda/core/_utils/clear_error_support.py diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd b/cuda_core/cuda/core/_utils/cuda_utils.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd rename to cuda_core/cuda/core/_utils/cuda_utils.pxd diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx rename to cuda_core/cuda/core/_utils/cuda_utils.pyx diff --git a/cuda_core/cuda/core/experimental/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/driver_cu_result_explanations.py rename to cuda_core/cuda/core/_utils/driver_cu_result_explanations.py diff --git a/cuda_core/cuda/core/experimental/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/runtime_cuda_error_explanations.py rename to cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py diff --git a/cuda_core/cuda/core/experimental/include/dlpack.h b/cuda_core/cuda/core/include/dlpack.h similarity index 100% rename from cuda_core/cuda/core/experimental/include/dlpack.h rename to cuda_core/cuda/core/include/dlpack.h diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/include/utility.hpp similarity index 100% rename from cuda_core/cuda/core/experimental/include/utility.hpp rename to cuda_core/cuda/core/include/utility.hpp diff --git a/cuda_core/cuda/core/experimental/utils.py b/cuda_core/cuda/core/utils.py similarity index 100% rename from cuda_core/cuda/core/experimental/utils.py rename to cuda_core/cuda/core/utils.py From 77445b3484c1d141488d0ee7d037e326a8b53eba Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:07 -0800 Subject: [PATCH 02/26] Update all internal imports from cuda.core.experimental.* to cuda.core.* [UNTESTED] This commit updates all import statements in the moved files to reference the new cuda.core.* paths instead of cuda.core.experimental.*. Updated: - Python files (.py): _module.py, _graph.py, _linker.py, _program.py, _system.py, utils.py - Cython files (.pyx): _device.pyx, _stream.pyx, _launcher.pyx, _launch_config.pyx, _event.pyx - Memory subdirectory: all .py, .pyx, and .pxd files - Utils subdirectory: cuda_utils.pyx - Other files: _kernel_arg_handler.pyx, _memoryview.pyx All imports now use the new non-experimental paths. --- cuda_core/cuda/core/_device.pyx | 26 +++++++++---------- cuda_core/cuda/core/_event.pyx | 6 ++--- cuda_core/cuda/core/_graph.py | 4 +-- cuda_core/cuda/core/_launch_config.pyx | 4 +-- cuda_core/cuda/core/_launcher.pyx | 14 +++++----- cuda_core/cuda/core/_linker.py | 8 +++--- cuda_core/cuda/core/_memory/_buffer.pxd | 2 +- cuda_core/cuda/core/_memory/_buffer.pyx | 14 +++++----- .../core/_memory/_device_memory_resource.pxd | 4 +-- .../core/_memory/_device_memory_resource.pyx | 12 ++++----- .../core/_memory/_graph_memory_resource.pxd | 2 +- .../core/_memory/_graph_memory_resource.pyx | 8 +++--- cuda_core/cuda/core/_memory/_ipc.pxd | 4 +-- cuda_core/cuda/core/_memory/_ipc.pyx | 4 +-- cuda_core/cuda/core/_memory/_legacy.py | 10 +++---- .../core/_memory/_virtual_memory_resource.py | 8 +++--- cuda_core/cuda/core/_module.py | 10 +++---- cuda_core/cuda/core/_program.py | 10 +++---- cuda_core/cuda/core/_stream.pyx | 16 ++++++------ cuda_core/cuda/core/_system.py | 4 +-- cuda_core/cuda/core/_utils/cuda_utils.pyx | 4 +-- cuda_core/cuda/core/utils.py | 2 +- 22 files changed, 88 insertions(+), 88 deletions(-) diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx index d9204487f0..e805430558 100644 --- a/cuda_core/cuda/core/_device.pyx +++ b/cuda_core/cuda/core/_device.pyx @@ -6,27 +6,27 @@ cimport cpython from libc.stdint cimport uintptr_t from cuda.bindings cimport cydriver -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN import threading from typing import Optional, TYPE_CHECKING, Union -from cuda.core.experimental._context import Context, ContextOptions -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._graph import GraphBuilder -from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._context import Context, ContextOptions +from cuda.core._event import Event, EventOptions +from cuda.core._graph import GraphBuilder +from cuda.core._stream import IsStreamT, Stream, StreamOptions +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( ComputeCapability, CUDAError, driver, handle_return, runtime, ) -from cuda.core.experimental._stream cimport default_stream +from cuda.core._stream cimport default_stream if TYPE_CHECKING: - from cuda.core.experimental._memory import Buffer, MemoryResource + from cuda.core._memory import Buffer, MemoryResource # TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python, # but it seems it is very convenient to expose them for testing purposes... @@ -1133,17 +1133,17 @@ class Device: ) ) if attr == 1: - from cuda.core.experimental._memory import DeviceMemoryResource + from cuda.core._memory import DeviceMemoryResource self._memory_resource = DeviceMemoryResource(self._id) else: - from cuda.core.experimental._memory import _SynchronousMemoryResource + from cuda.core._memory import _SynchronousMemoryResource self._memory_resource = _SynchronousMemoryResource(self._id) return self._memory_resource @memory_resource.setter def memory_resource(self, mr): - from cuda.core.experimental._memory import MemoryResource + from cuda.core._memory import MemoryResource assert_type(mr, MemoryResource) self._memory_resource = mr @@ -1202,7 +1202,7 @@ class Device: Acts as an entry point of this object. Users always start a code by calling this method, e.g. - >>> from cuda.core.experimental import Device + >>> from cuda.core import Device >>> dev0 = Device(0) >>> dev0.set_current() >>> # ... do work on device 0 ... diff --git a/cuda_core/cuda/core/_event.pyx b/cuda_core/cuda/core/_event.pyx index 98a45d0043..0e253f3471 100644 --- a/cuda_core/cuda/core/_event.pyx +++ b/cuda_core/cuda/core/_event.pyx @@ -8,7 +8,7 @@ cimport cpython from libc.stdint cimport uintptr_t from libc.string cimport memcpy from cuda.bindings cimport cydriver -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, HANDLE_RETURN ) @@ -18,8 +18,8 @@ from dataclasses import dataclass import multiprocessing from typing import TYPE_CHECKING, Optional -from cuda.core.experimental._context import Context -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._context import Context +from cuda.core._utils.cuda_utils import ( CUDAError, driver, ) diff --git a/cuda_core/cuda/core/_graph.py b/cuda_core/cuda/core/_graph.py index a82bd70f55..df51126bb0 100644 --- a/cuda_core/cuda/core/_graph.py +++ b/cuda_core/cuda/core/_graph.py @@ -9,8 +9,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.cuda_utils import ( + from cuda.core._stream import Stream +from cuda.core._utils.cuda_utils import ( driver, get_binding_version, handle_return, diff --git a/cuda_core/cuda/core/_launch_config.pyx b/cuda_core/cuda/core/_launch_config.pyx index 7d6a1ab2b9..00c71ad903 100644 --- a/cuda_core/cuda/core/_launch_config.pyx +++ b/cuda_core/cuda/core/_launch_config.pyx @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._device import Device -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._utils.cuda_utils import ( CUDAError, cast_to_3_tuple, driver, diff --git a/cuda_core/cuda/core/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx index 2cba15cbf4..09900a668c 100644 --- a/cuda_core/cuda/core/_launcher.pyx +++ b/cuda_core/cuda/core/_launcher.pyx @@ -1,15 +1,15 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._launch_config cimport LaunchConfig, _to_native_launch_config -from cuda.core.experimental._stream cimport Stream_accept +from cuda.core._launch_config cimport LaunchConfig, _to_native_launch_config +from cuda.core._stream cimport Stream_accept -from cuda.core.experimental._kernel_arg_handler import ParamHolder -from cuda.core.experimental._module import Kernel -from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._kernel_arg_handler import ParamHolder +from cuda.core._module import Kernel +from cuda.core._stream import Stream +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( _reduce_3_tuple, check_or_create_options, driver, diff --git a/cuda_core/cuda/core/_linker.py b/cuda_core/cuda/core/_linker.py index 5c54a88c8c..fcb3416f33 100644 --- a/cuda_core/cuda/core/_linker.py +++ b/cuda_core/cuda/core/_linker.py @@ -15,10 +15,10 @@ if TYPE_CHECKING: import cuda.bindings -from cuda.core.experimental._device import Device -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import check_or_create_options, driver, handle_return, is_sequence +from cuda.core._device import Device +from cuda.core._module import ObjectCode +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import check_or_create_options, driver, handle_return, is_sequence # TODO: revisit this treatment for py313t builds _driver = None # populated if nvJitLink cannot be used diff --git a/cuda_core/cuda/core/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd index 12da84b2bd..24571d1161 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pxd +++ b/cuda_core/cuda/core/_memory/_buffer.pxd @@ -4,7 +4,7 @@ from libc.stdint cimport uintptr_t -from cuda.core.experimental._stream cimport Stream +from cuda.core._stream cimport Stream cdef class Buffer: diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx index 1ad79538ac..01eb8bc8dc 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -6,19 +6,19 @@ from __future__ import annotations from libc.stdint cimport uintptr_t -from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource -from cuda.core.experimental._memory._ipc cimport IPCBufferDescriptor -from cuda.core.experimental._memory cimport _ipc -from cuda.core.experimental._stream cimport Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._memory._device_memory_resource cimport DeviceMemoryResource +from cuda.core._memory._ipc cimport IPCBufferDescriptor +from cuda.core._memory cimport _ipc +from cuda.core._stream cimport Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport ( _check_driver_error as raise_if_driver_error, ) import abc from typing import TypeVar, Union -from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._dlpack import DLDeviceType, make_py_capsule +from cuda.core._utils.cuda_utils import driver __all__ = ['Buffer', 'MemoryResource'] diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pxd b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd index cdd00de067..c3a11eaf61 100644 --- a/cuda_core/cuda/core/_memory/_device_memory_resource.pxd +++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport MemoryResource -from cuda.core.experimental._memory._ipc cimport IPCData +from cuda.core._memory._buffer cimport MemoryResource +from cuda.core._memory._ipc cimport IPCData cdef class DeviceMemoryResource(MemoryResource): diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx index 74ffbabd46..24766748f7 100644 --- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx @@ -9,11 +9,11 @@ from libc.stdint cimport uintptr_t from libc.string cimport memset from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer, MemoryResource -from cuda.core.experimental._memory cimport _ipc -from cuda.core.experimental._memory._ipc cimport IPCAllocationHandle, IPCData -from cuda.core.experimental._stream cimport default_stream, Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._memory._buffer cimport Buffer, MemoryResource +from cuda.core._memory cimport _ipc +from cuda.core._memory._ipc cimport IPCAllocationHandle, IPCData +from cuda.core._stream cimport default_stream, Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, HANDLE_RETURN, ) @@ -24,7 +24,7 @@ import platform # no-cython-lint import uuid import weakref -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._utils.cuda_utils import driver if TYPE_CHECKING: from cuda.core.experimental._memory.buffer import DevicePointerT diff --git a/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd b/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd index f9c7798e76..2f6c35d72e 100644 --- a/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd +++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._memory._buffer cimport MemoryResource +from cuda.core._memory._buffer cimport MemoryResource cdef class cyGraphMemoryResource(MemoryResource): diff --git a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx index c65354b612..bda075c201 100644 --- a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx @@ -7,15 +7,15 @@ from __future__ import annotations from libc.stdint cimport intptr_t from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer, MemoryResource -from cuda.core.experimental._stream cimport default_stream, Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._memory._buffer cimport Buffer, MemoryResource +from cuda.core._stream cimport default_stream, Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN from functools import cache from typing import TYPE_CHECKING if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT __all__ = ['GraphMemoryResource'] diff --git a/cuda_core/cuda/core/_memory/_ipc.pxd b/cuda_core/cuda/core/_memory/_ipc.pxd index 2b9c80290d..5eaaff3231 100644 --- a/cuda_core/cuda/core/_memory/_ipc.pxd +++ b/cuda_core/cuda/core/_memory/_ipc.pxd @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer -from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource +from cuda.core._memory._buffer cimport Buffer +from cuda.core._memory._device_memory_resource cimport DeviceMemoryResource # Holds DeviceMemoryResource objects imported by this process. This enables diff --git a/cuda_core/cuda/core/_memory/_ipc.pyx b/cuda_core/cuda/core/_memory/_ipc.pyx index d9384bf2b8..e55b957850 100644 --- a/cuda_core/cuda/core/_memory/_ipc.pyx +++ b/cuda_core/cuda/core/_memory/_ipc.pyx @@ -7,8 +7,8 @@ from libc.stdint cimport uintptr_t from libc.string cimport memcpy from cuda.bindings cimport cydriver -from cuda.core.experimental._stream cimport default_stream -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._stream cimport default_stream +from cuda.core._utils.cuda_utils cimport ( HANDLE_RETURN, ) diff --git a/cuda_core/cuda/core/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py index 09ea0e15d2..74ba1eb5aa 100644 --- a/cuda_core/cuda/core/_memory/_legacy.py +++ b/cuda_core/cuda/core/_memory/_legacy.py @@ -6,8 +6,8 @@ from typing import TYPE_CHECKING -from cuda.core.experimental._memory._buffer import Buffer, MemoryResource -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._memory._buffer import Buffer, MemoryResource +from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) from cuda.core.experimental._utils.cuda_utils import ( @@ -15,7 +15,7 @@ ) if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT __all__ = ["LegacyPinnedMemoryResource", "_SynchronousMemoryResource"] @@ -43,7 +43,7 @@ def allocate(self, size, stream=None) -> Buffer: The allocated buffer object, which is accessible on both host and device. """ if stream is None: - from cuda.core.experimental._stream import default_stream + from cuda.core._stream import default_stream stream = default_stream() err, ptr = driver.cuMemAllocHost(size) @@ -93,7 +93,7 @@ def __init__(self, device_id): def allocate(self, size, stream=None) -> Buffer: if stream is None: - from cuda.core.experimental._stream import default_stream + from cuda.core._stream import default_stream stream = default_stream() err, ptr = driver.cuMemAlloc(size) diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py index 04f0d33a0b..bd7322c4d3 100644 --- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py @@ -7,9 +7,9 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Iterable, Literal, Union -from cuda.core.experimental._device import Device -from cuda.core.experimental._memory._buffer import Buffer, MemoryResource -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._memory._buffer import Buffer, MemoryResource +from cuda.core._utils.cuda_utils import ( Transaction, check_or_create_options, driver, @@ -20,7 +20,7 @@ ) if TYPE_CHECKING: - from cuda.core.experimental._stream import Stream + from cuda.core._stream import Stream __all__ = ["VirtualMemoryResourceOptions", "VirtualMemoryResource"] diff --git a/cuda_core/cuda/core/_module.py b/cuda_core/cuda/core/_module.py index 9af722465b..b0f6ff387d 100644 --- a/cuda_core/cuda/core/_module.py +++ b/cuda_core/cuda/core/_module.py @@ -7,15 +7,15 @@ from typing import Union from warnings import warn -from cuda.core.experimental._device import Device -from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config -from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.clear_error_support import ( +from cuda.core._device import Device +from cuda.core._launch_config import LaunchConfig, _to_native_launch_config +from cuda.core._stream import Stream +from cuda.core._utils.clear_error_support import ( assert_type, assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable, ) -from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition +from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return, precondition _backend = { "old": { diff --git a/cuda_core/cuda/core/_program.py b/cuda_core/cuda/core/_program.py index cdef7c3be6..6344991e30 100644 --- a/cuda_core/cuda/core/_program.py +++ b/cuda_core/cuda/core/_program.py @@ -13,11 +13,11 @@ if TYPE_CHECKING: import cuda.bindings -from cuda.core.experimental._device import Device -from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._linker import Linker, LinkerHandleT, LinkerOptions +from cuda.core._module import ObjectCode +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( _handle_boolean_option, check_or_create_options, driver, diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx index 87ec4a691a..b724f9aee3 100644 --- a/cuda_core/cuda/core/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -9,8 +9,8 @@ from libc.stdlib cimport strtol, getenv from cuda.bindings cimport cydriver -from cuda.core.experimental._event cimport Event as cyEvent -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._event cimport Event as cyEvent +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, CU_CONTEXT_INVALID, get_device_from_ctx, @@ -24,11 +24,11 @@ from typing import TYPE_CHECKING, Optional, Protocol, Union if TYPE_CHECKING: import cuda.bindings - from cuda.core.experimental._device import Device -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._graph import GraphBuilder -from cuda.core.experimental._utils.cuda_utils import ( + from cuda.core._device import Device +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._graph import GraphBuilder +from cuda.core._utils.cuda_utils import ( driver, ) @@ -311,7 +311,7 @@ cdef class Stream: context is set current after a stream is created. """ - from cuda.core.experimental._device import Device # avoid circular import + from cuda.core._device import Device # avoid circular import self._get_device_and_context() return Device((self._device_id)) diff --git a/cuda_core/cuda/core/_system.py b/cuda_core/cuda/core/_system.py index cbbc1a83cb..a8338114b2 100644 --- a/cuda_core/cuda/core/_system.py +++ b/cuda_core/cuda/core/_system.py @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._device import Device -from cuda.core.experimental._utils.cuda_utils import driver, handle_return, runtime +from cuda.core._device import Device +from cuda.core._utils.cuda_utils import driver, handle_return, runtime class System: diff --git a/cuda_core/cuda/core/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx index d57a777537..e296186f25 100644 --- a/cuda_core/cuda/core/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx @@ -17,8 +17,8 @@ except ImportError: from cuda import cudart as runtime from cuda import nvrtc -from cuda.core.experimental._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS -from cuda.core.experimental._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS +from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS +from cuda.core._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS class CUDAError(Exception): diff --git a/cuda_core/cuda/core/utils.py b/cuda_core/cuda/core/utils.py index 32f62918f6..f15d924277 100644 --- a/cuda_core/cuda/core/utils.py +++ b/cuda_core/cuda/core/utils.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._memoryview import ( +from cuda.core._memoryview import ( StridedMemoryView, # noqa: F401 args_viewable_as_strided_memory, # noqa: F401 ) From 3c79ca62f54cb86f2738beb347d262ed9a32b8da Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:26 -0800 Subject: [PATCH 03/26] Update cuda/core/__init__.py to export all moved symbols [UNTESTED] This commit updates cuda/core/__init__.py to export all the symbols that were moved from experimental namespace. The imports now reference the new cuda.core.* paths instead of cuda.core.experimental.*. Exported symbols: - Device, Event, EventOptions - Graph, GraphBuilder, GraphCompleteOptions, GraphDebugPrintOptions - LaunchConfig, launch - Linker, LinkerOptions - Memory classes: Buffer, DeviceMemoryResource, etc. - Kernel, ObjectCode - Program, ProgramOptions - Stream, StreamOptions - System, system - utils module --- cuda_core/cuda/core/__init__.py | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py index 96a80d1f3e..d84b9e072d 100644 --- a/cuda_core/cuda/core/__init__.py +++ b/cuda_core/cuda/core/__init__.py @@ -3,3 +3,58 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.core._version import __version__ + +try: + from cuda import bindings +except ImportError: + raise ImportError("cuda.bindings 12.x or 13.x must be installed") from None +else: + cuda_major, cuda_minor = bindings.__version__.split(".")[:2] + if cuda_major not in ("12", "13"): + raise ImportError("cuda.bindings 12.x or 13.x must be installed") + +import importlib + +subdir = f"cu{cuda_major}" +try: + versioned_mod = importlib.import_module(f".{subdir}", __package__) + # Import all symbols from the module + globals().update(versioned_mod.__dict__) +except ImportError: + # This is not a wheel build, but a conda or local build, do nothing + pass +else: + del versioned_mod +finally: + del bindings, importlib, subdir, cuda_major, cuda_minor + +from cuda.core import utils # noqa: E402 +from cuda.core._device import Device # noqa: E402 +from cuda.core._event import Event, EventOptions # noqa: E402 +from cuda.core._graph import ( # noqa: E402 + Graph, + GraphBuilder, + GraphCompleteOptions, + GraphDebugPrintOptions, +) +from cuda.core._launch_config import LaunchConfig # noqa: E402 +from cuda.core._launcher import launch # noqa: E402 +from cuda.core._linker import Linker, LinkerOptions # noqa: E402 +from cuda.core._memory import ( # noqa: E402 + Buffer, + DeviceMemoryResource, + DeviceMemoryResourceOptions, + GraphMemoryResource, + LegacyPinnedMemoryResource, + MemoryResource, + VirtualMemoryResource, + VirtualMemoryResourceOptions, +) +from cuda.core._module import Kernel, ObjectCode # noqa: E402 +from cuda.core._program import Program, ProgramOptions # noqa: E402 +from cuda.core._stream import Stream, StreamOptions # noqa: E402 +from cuda.core._system import System # noqa: E402 + +system = System() +__import__("sys").modules[__spec__.name + ".system"] = system +del System From b54bf6a58dbc87c990c55e71bbfc1c80d81ff88f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:33 -0800 Subject: [PATCH 04/26] Create forwarding stubs in experimental/__init__.py with deprecation warnings [UNTESTED] This commit creates backward compatibility stubs in the experimental namespace that forward imports to the new cuda.core.* locations and emit deprecation warnings. Features: - All public symbols are re-exported from new locations - DeprecationWarning is emitted when the experimental module is imported - __getattr__ provides lazy forwarding for submodules - Clear migration message in docstring The stubs ensure backward compatibility while encouraging users to migrate to the new import paths. --- cuda_core/cuda/core/experimental/__init__.py | 128 +++++++++++++------ 1 file changed, 92 insertions(+), 36 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 826ea70b97..7a29119a48 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -2,43 +2,80 @@ # # SPDX-License-Identifier: Apache-2.0 -try: - from cuda import bindings -except ImportError: - raise ImportError("cuda.bindings 12.x or 13.x must be installed") from None -else: - cuda_major, cuda_minor = bindings.__version__.split(".")[:2] - if cuda_major not in ("12", "13"): - raise ImportError("cuda.bindings 12.x or 13.x must be installed") - -import importlib - -subdir = f"cu{cuda_major}" -try: - versioned_mod = importlib.import_module(f".{subdir}", __package__) - # Import all symbols from the module - globals().update(versioned_mod.__dict__) -except ImportError: - # This is not a wheel build, but a conda or local build, do nothing - pass -else: - del versioned_mod -finally: - del bindings, importlib, subdir, cuda_major, cuda_minor - -from cuda.core.experimental import utils # noqa: E402 -from cuda.core.experimental._device import Device # noqa: E402 -from cuda.core.experimental._event import Event, EventOptions # noqa: E402 -from cuda.core.experimental._graph import ( # noqa: E402 +""" +Backward compatibility stubs for cuda.core.experimental namespace. + +This module provides forwarding stubs that import from the new cuda.core.* +locations and emit deprecation warnings. Users should migrate to importing +directly from cuda.core instead of cuda.core.experimental. + +The experimental namespace will be removed in a future release. +""" + +import warnings +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # For type checkers, import from the new location + from cuda.core import ( + Buffer, + Device, + DeviceMemoryResource, + DeviceMemoryResourceOptions, + Event, + EventOptions, + Graph, + GraphBuilder, + GraphCompleteOptions, + GraphDebugPrintOptions, + Kernel, + LaunchConfig, + LegacyPinnedMemoryResource, + Linker, + LinkerOptions, + MemoryResource, + ObjectCode, + Program, + ProgramOptions, + Stream, + StreamOptions, + VirtualMemoryResource, + VirtualMemoryResourceOptions, + launch, + system, + utils, + ) + + +def _warn_deprecated(): + """Emit a deprecation warning for using the experimental namespace.""" + warnings.warn( + "The cuda.core.experimental namespace is deprecated. " + "Please import directly from cuda.core instead. " + "For example, use 'from cuda.core import Device' instead of " + "'from cuda.core.experimental import Device'. " + "The experimental namespace will be removed in a future release.", + DeprecationWarning, + stacklevel=3, + ) + + +# Import from new locations and re-export +_warn_deprecated() + +from cuda.core import utils # noqa: E402 +from cuda.core._device import Device # noqa: E402 +from cuda.core._event import Event, EventOptions # noqa: E402 +from cuda.core._graph import ( # noqa: E402 Graph, GraphBuilder, GraphCompleteOptions, GraphDebugPrintOptions, ) -from cuda.core.experimental._launch_config import LaunchConfig # noqa: E402 -from cuda.core.experimental._launcher import launch # noqa: E402 -from cuda.core.experimental._linker import Linker, LinkerOptions # noqa: E402 -from cuda.core.experimental._memory import ( # noqa: E402 +from cuda.core._launch_config import LaunchConfig # noqa: E402 +from cuda.core._launcher import launch # noqa: E402 +from cuda.core._linker import Linker, LinkerOptions # noqa: E402 +from cuda.core._memory import ( # noqa: E402 Buffer, DeviceMemoryResource, DeviceMemoryResourceOptions, @@ -48,11 +85,30 @@ VirtualMemoryResource, VirtualMemoryResourceOptions, ) -from cuda.core.experimental._module import Kernel, ObjectCode # noqa: E402 -from cuda.core.experimental._program import Program, ProgramOptions # noqa: E402 -from cuda.core.experimental._stream import Stream, StreamOptions # noqa: E402 -from cuda.core.experimental._system import System # noqa: E402 +from cuda.core._module import Kernel, ObjectCode # noqa: E402 +from cuda.core._program import Program, ProgramOptions # noqa: E402 +from cuda.core._stream import Stream, StreamOptions # noqa: E402 +from cuda.core._system import System # noqa: E402 system = System() __import__("sys").modules[__spec__.name + ".system"] = system del System + +# Also create forwarding stubs for submodules +# These will be imported lazily when accessed +def __getattr__(name): + """Forward attribute access to the new location with deprecation warning.""" + if name in ("_device", "_event", "_graph", "_launch_config", "_launcher", + "_linker", "_memory", "_module", "_program", "_stream", "_system", + "_utils", "_context", "_dlpack", "_kernel_arg_handler", + "_launch_config", "_memoryview"): + _warn_deprecated() + # Import the submodule from the new location + import importlib + new_name = name.lstrip("_") + try: + return importlib.import_module(f"cuda.core.{new_name}") + except ImportError: + # Fallback to underscore-prefixed name + return importlib.import_module(f"cuda.core.{name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") From 411bbd79a5dd5d12fc79ce03bf891598f6ae1688 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:43 -0800 Subject: [PATCH 05/26] Update test imports to use new cuda.core.* paths [UNTESTED] This commit updates all test files to import from the new cuda.core.* paths instead of cuda.core.experimental.*. Updated 28 test files: - All test_*.py files - Helper files in tests/helpers/ - Memory IPC test files - Example test files Tests now exercise the new API paths directly, ensuring the migration is complete. Backward compatibility will be tested separately. --- cuda_core/tests/conftest.py | 4 ++-- .../tests/example_tests/test_basic_examples.py | 2 +- cuda_core/tests/helpers/buffers.py | 4 ++-- cuda_core/tests/helpers/latch.py | 2 +- cuda_core/tests/helpers/nanosleep_kernel.py | 2 +- cuda_core/tests/memory_ipc/test_errors.py | 4 ++-- cuda_core/tests/memory_ipc/test_event_ipc.py | 2 +- cuda_core/tests/memory_ipc/test_memory_ipc.py | 2 +- .../tests/memory_ipc/test_send_buffers.py | 2 +- cuda_core/tests/memory_ipc/test_serialize.py | 2 +- cuda_core/tests/memory_ipc/test_workerpool.py | 2 +- cuda_core/tests/test_comparable.py | 8 ++++---- cuda_core/tests/test_context.py | 4 ++-- cuda_core/tests/test_cuda_utils.py | 2 +- cuda_core/tests/test_device.py | 6 +++--- cuda_core/tests/test_event.py | 4 ++-- cuda_core/tests/test_graph.py | 4 ++-- cuda_core/tests/test_graph_mem.py | 2 +- cuda_core/tests/test_hashable.py | 8 ++++---- cuda_core/tests/test_helpers.py | 2 +- cuda_core/tests/test_launcher.py | 10 +++++----- cuda_core/tests/test_linker.py | 6 +++--- cuda_core/tests/test_memory.py | 12 ++++++------ cuda_core/tests/test_module.py | 6 +++--- cuda_core/tests/test_program.py | 18 +++++++++--------- cuda_core/tests/test_stream.py | 8 ++++---- cuda_core/tests/test_system.py | 4 ++-- cuda_core/tests/test_utils.py | 8 ++++---- 28 files changed, 70 insertions(+), 70 deletions(-) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 3804124438..490ca1f559 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -12,8 +12,8 @@ except ImportError: from cuda import cuda as driver -from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device +from cuda.core._utils.cuda_utils import handle_return @pytest.fixture(scope="session", autouse=True) diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py index 450c60bf06..640b53c2fc 100644 --- a/cuda_core/tests/example_tests/test_basic_examples.py +++ b/cuda_core/tests/example_tests/test_basic_examples.py @@ -7,7 +7,7 @@ import os import pytest -from cuda.core.experimental import Device +from cuda.core import Device from .utils import run_example diff --git a/cuda_core/tests/helpers/buffers.py b/cuda_core/tests/helpers/buffers.py index b4d769eab3..3004cd0d00 100644 --- a/cuda_core/tests/helpers/buffers.py +++ b/cuda_core/tests/helpers/buffers.py @@ -3,8 +3,8 @@ import ctypes -from cuda.core.experimental import Buffer, Device, MemoryResource -from cuda.core.experimental._utils.cuda_utils import driver, handle_return +from cuda.core import Buffer, Device, MemoryResource +from cuda.core._utils.cuda_utils import driver, handle_return from . import libc diff --git a/cuda_core/tests/helpers/latch.py b/cuda_core/tests/helpers/latch.py index 46516c1b06..e35ee3325b 100644 --- a/cuda_core/tests/helpers/latch.py +++ b/cuda_core/tests/helpers/latch.py @@ -4,7 +4,7 @@ import ctypes import pytest -from cuda.core.experimental import ( +from cuda.core import ( LaunchConfig, LegacyPinnedMemoryResource, Program, diff --git a/cuda_core/tests/helpers/nanosleep_kernel.py b/cuda_core/tests/helpers/nanosleep_kernel.py index ea6ae34dcf..99d32c9aa4 100644 --- a/cuda_core/tests/helpers/nanosleep_kernel.py +++ b/cuda_core/tests/helpers/nanosleep_kernel.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental import ( +from cuda.core import ( LaunchConfig, Program, ProgramOptions, diff --git a/cuda_core/tests/memory_ipc/test_errors.py b/cuda_core/tests/memory_ipc/test_errors.py index 3e8265b39c..331c2a4fbe 100644 --- a/cuda_core/tests/memory_ipc/test_errors.py +++ b/cuda_core/tests/memory_ipc/test_errors.py @@ -5,8 +5,8 @@ import pickle import re -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core._utils.cuda_utils import CUDAError CHILD_TIMEOUT_SEC = 20 NBYTES = 64 diff --git a/cuda_core/tests/memory_ipc/test_event_ipc.py b/cuda_core/tests/memory_ipc/test_event_ipc.py index ce756cba21..e4b486e6e1 100644 --- a/cuda_core/tests/memory_ipc/test_event_ipc.py +++ b/cuda_core/tests/memory_ipc/test_event_ipc.py @@ -4,7 +4,7 @@ import multiprocessing as mp import pytest -from cuda.core.experimental import Device, EventOptions +from cuda.core import Device, EventOptions from helpers.buffers import compare_equal_buffers, make_scratch_buffer from helpers.latch import LatchKernel from helpers.logging import TimestampedLogger diff --git a/cuda_core/tests/memory_ipc/test_memory_ipc.py b/cuda_core/tests/memory_ipc/test_memory_ipc.py index 23a3e91b7f..fbb444cede 100644 --- a/cuda_core/tests/memory_ipc/test_memory_ipc.py +++ b/cuda_core/tests/memory_ipc/test_memory_ipc.py @@ -3,7 +3,7 @@ import multiprocessing as mp -from cuda.core.experimental import Buffer, DeviceMemoryResource +from cuda.core import Buffer, DeviceMemoryResource from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_send_buffers.py b/cuda_core/tests/memory_ipc/test_send_buffers.py index a26b4422fe..46cf297138 100644 --- a/cuda_core/tests/memory_ipc/test_send_buffers.py +++ b/cuda_core/tests/memory_ipc/test_send_buffers.py @@ -5,7 +5,7 @@ from itertools import cycle import pytest -from cuda.core.experimental import DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core import DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_serialize.py b/cuda_core/tests/memory_ipc/test_serialize.py index ceac50e502..75d1567b71 100644 --- a/cuda_core/tests/memory_ipc/test_serialize.py +++ b/cuda_core/tests/memory_ipc/test_serialize.py @@ -5,7 +5,7 @@ import multiprocessing.reduction import os -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource +from cuda.core import Buffer, Device, DeviceMemoryResource from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_workerpool.py b/cuda_core/tests/memory_ipc/test_workerpool.py index 3f3f46cd27..b13b9896a1 100644 --- a/cuda_core/tests/memory_ipc/test_workerpool.py +++ b/cuda_core/tests/memory_ipc/test_workerpool.py @@ -6,7 +6,7 @@ from itertools import cycle import pytest -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/test_comparable.py b/cuda_core/tests/test_comparable.py index c99963cd23..a93e49e4e8 100644 --- a/cuda_core/tests/test_comparable.py +++ b/cuda_core/tests/test_comparable.py @@ -8,10 +8,10 @@ across Device, Stream, Event, and Context objects. """ -from cuda.core.experimental import Device, Stream -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._stream import StreamOptions +from cuda.core import Device, Stream +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._stream import StreamOptions # ============================================================================ # Equality Contract Tests diff --git a/cuda_core/tests/test_context.py b/cuda_core/tests/test_context.py index 4fe35dc18d..f2204720da 100644 --- a/cuda_core/tests/test_context.py +++ b/cuda_core/tests/test_context.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device +from cuda.core import Device def test_context_init_disabled(): diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index b0a0518652..c68f8fb841 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -4,7 +4,7 @@ import pytest from cuda.bindings import driver, runtime -from cuda.core.experimental._utils import cuda_utils +from cuda.core._utils import cuda_utils def test_driver_cu_result_explanations_health(): diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index fa484fa65b..3b277a8811 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -6,10 +6,10 @@ except ImportError: from cuda import cuda as driver from cuda import cudart as runtime -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device -from cuda.core.experimental._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return +from cuda.core import Device +from cuda.core._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return def test_device_init_disabled(): diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index ec35448619..eba46c9327 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -4,9 +4,9 @@ import math -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, Event, EventOptions, diff --git a/cuda_core/tests/test_graph.py b/cuda_core/tests/test_graph.py index 615f7242c4..178339078d 100644 --- a/cuda_core/tests/test_graph.py +++ b/cuda_core/tests/test_graph.py @@ -9,7 +9,7 @@ from cuda.bindings import nvrtc except ImportError: from cuda import nvrtc -from cuda.core.experimental import ( +from cuda.core import ( Device, GraphBuilder, GraphCompleteOptions, @@ -20,7 +20,7 @@ ProgramOptions, launch, ) -from cuda.core.experimental._utils.cuda_utils import NVRTCError, handle_return +from cuda.core._utils.cuda_utils import NVRTCError, handle_return def _common_kernels(): diff --git a/cuda_core/tests/test_graph_mem.py b/cuda_core/tests/test_graph_mem.py index 9444f2ee93..8148d65282 100644 --- a/cuda_core/tests/test_graph_mem.py +++ b/cuda_core/tests/test_graph_mem.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, DeviceMemoryResource, GraphCompleteOptions, diff --git a/cuda_core/tests/test_hashable.py b/cuda_core/tests/test_hashable.py index 4aa801866f..9bc89969a2 100644 --- a/cuda_core/tests/test_hashable.py +++ b/cuda_core/tests/test_hashable.py @@ -12,10 +12,10 @@ 5. Hash/equality contract compliance (if a == b, then hash(a) must equal hash(b)) """ -from cuda.core.experimental import Device -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._stream import Stream, StreamOptions +from cuda.core import Device +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._stream import Stream, StreamOptions # ============================================================================ # Integration Tests diff --git a/cuda_core/tests/test_helpers.py b/cuda_core/tests/test_helpers.py index 65df23980c..8230f08088 100644 --- a/cuda_core/tests/test_helpers.py +++ b/cuda_core/tests/test_helpers.py @@ -5,7 +5,7 @@ import time import pytest -from cuda.core.experimental import Device +from cuda.core import Device from helpers import IS_WINDOWS, IS_WSL from helpers.buffers import PatternGen, compare_equal_buffers, make_scratch_buffer from helpers.latch import LatchKernel diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py index d2e0a89a28..ae3e5531c1 100644 --- a/cuda_core/tests/test_launcher.py +++ b/cuda_core/tests/test_launcher.py @@ -12,7 +12,7 @@ cp = None import numpy as np import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, DeviceMemoryResource, LaunchConfig, @@ -21,8 +21,8 @@ ProgramOptions, launch, ) -from cuda.core.experimental._memory import _SynchronousMemoryResource -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core._memory import _SynchronousMemoryResource +from cuda.core._utils.cuda_utils import CUDAError from conftest import skipif_need_cuda_headers @@ -95,7 +95,7 @@ def test_launch_config_cluster_grid_conversion(init_cuda): def test_launch_config_native_conversion(init_cuda): """Test that _to_native_launch_config correctly converts grid from cluster units to block units.""" - from cuda.core.experimental._launch_config import _to_native_launch_config + from cuda.core._launch_config import _to_native_launch_config try: # Test case 1: 1D - Issue #867 example @@ -264,7 +264,7 @@ def test_cooperative_launch(): # # Commented out as this seems to be a sticky error... # config = LaunchConfig(grid=1, block=1) # launch(s, config, ker) - # from cuda.core.experimental._utils.cuda_utils import CUDAError + # from cuda.core._utils.cuda_utils import CUDAError # with pytest.raises(CUDAError) as e: # s.sync() # assert "CUDA_ERROR_LAUNCH_FAILED" in str(e) diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index e0c8d37b65..ad68201456 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -3,9 +3,9 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker +from cuda.core._module import ObjectCode +from cuda.core._utils.cuda_utils import CUDAError ARCH = "sm_" + "".join(f"{i}" for i in Device().compute_capability) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 796c12ea7d..626c95361f 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -17,7 +17,7 @@ import re import pytest -from cuda.core.experimental import ( +from cuda.core import ( Buffer, Device, DeviceMemoryResource, @@ -27,10 +27,10 @@ VirtualMemoryResource, VirtualMemoryResourceOptions, ) -from cuda.core.experimental._dlpack import DLDeviceType -from cuda.core.experimental._memory import IPCBufferDescriptor -from cuda.core.experimental._utils.cuda_utils import handle_return -from cuda.core.experimental.utils import StridedMemoryView +from cuda.core._dlpack import DLDeviceType +from cuda.core._memory import IPCBufferDescriptor +from cuda.core._utils.cuda_utils import handle_return +from cuda.core.utils import StridedMemoryView from helpers import IS_WINDOWS from helpers.buffers import DummyUnifiedMemoryResource @@ -133,7 +133,7 @@ def test_package_contents(): "VirtualMemoryResource", ] d = {} - exec("from cuda.core.experimental._memory import *", d) # noqa: S102 + exec("from cuda.core._memory import *", d) # noqa: S102 d = {k: v for k, v in d.items() if not k.startswith("__")} assert sorted(expected) == sorted(d.keys()) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 901a57f7a4..a24f42a08c 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -5,10 +5,10 @@ import pickle import warnings -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device, ObjectCode, Program, ProgramOptions, system -from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return +from cuda.core import Device, ObjectCode, Program, ProgramOptions, system +from cuda.core._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return try: import numba diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 3add6b6b4c..01a4f10fa4 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -6,10 +6,10 @@ import warnings import pytest -from cuda.core.experimental import _linker -from cuda.core.experimental._module import Kernel, ObjectCode -from cuda.core.experimental._program import Program, ProgramOptions -from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, handle_return +from cuda.core import _linker +from cuda.core._module import Kernel, ObjectCode +from cuda.core._program import Program, ProgramOptions +from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() @@ -18,7 +18,7 @@ def _is_nvvm_available(): """Check if NVVM is available.""" try: - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module _get_nvvm_module() return True @@ -31,7 +31,7 @@ def _is_nvvm_available(): ) try: - from cuda.core.experimental._utils.cuda_utils import driver, handle_return, nvrtc + from cuda.core._utils.cuda_utils import driver, handle_return, nvrtc _cuda_driver_version = handle_return(driver.cuDriverGetVersion()) except Exception: @@ -91,7 +91,7 @@ def _get_libnvvm_version_for_tests(): _libnvvm_version_attempted = True try: - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() @@ -139,7 +139,7 @@ def nvvm_ir(): fallback assumes no version metadata will be present in the input nvvm ir """ - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() @@ -329,7 +329,7 @@ def test_program_close(): @nvvm_available def test_nvvm_deferred_import(): """Test that our deferred NVVM import works correctly""" - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() assert nvvm is not None diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index 695a70e931..01b0b861af 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from cuda.core.experimental import Device, Stream, StreamOptions -from cuda.core.experimental._event import Event -from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core import Device, Stream, StreamOptions +from cuda.core._event import Event +from cuda.core._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM +from cuda.core._utils.cuda_utils import driver from helpers.misc import StreamWrapper diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index d5195ed872..fb39d018c3 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -7,8 +7,8 @@ from cuda import cuda as driver from cuda import cudart as runtime -from cuda.core.experimental import Device, system -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core import Device, system +from cuda.core._utils.cuda_utils import handle_return def test_system_singleton(): diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py index 3580507250..96b05fe774 100644 --- a/cuda_core/tests/test_utils.py +++ b/cuda_core/tests/test_utils.py @@ -10,12 +10,12 @@ from numba import cuda as numba_cuda except ImportError: numba_cuda = None -import cuda.core.experimental +import cuda.core import numpy as np import pytest -from cuda.core.experimental import Device -from cuda.core.experimental._memoryview import view_as_cai -from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory +from cuda.core import Device +from cuda.core._memoryview import view_as_cai +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory def test_cast_to_3_tuple_success(): From 29e31df800f67a2fcedc042e4dc9741a1bb548b0 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:46 -0800 Subject: [PATCH 06/26] Update Cython test file to use new import paths [UNTESTED] Update the remaining .pyx test file to use cuda.core.* imports. --- cuda_core/tests/cython/test_get_cuda_native_handle.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx index 0c3921e925..2b105e13ae 100644 --- a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -13,7 +13,7 @@ from cuda.bindings.nvrtc cimport nvrtcProgram as pynvrtcProgram from cuda.bindings.cydriver cimport CUstream, CUevent from cuda.bindings.cynvrtc cimport nvrtcProgram -from cuda.core.experimental import Device, Program +from cuda.core import Device, Program cdef extern from "utility.hpp": From b7cb72bffc8aa8784343680852d65317ed8b5d8f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:25:54 -0800 Subject: [PATCH 07/26] Add backward compatibility tests for experimental stubs [UNTESTED] This commit adds comprehensive tests for the experimental namespace backward compatibility stubs. Tests verify: - Experimental imports still work and emit deprecation warnings - Symbols from experimental are the same objects as from core - Direct imports from experimental work correctly - Utils module and options classes are accessible - Memory classes and other public APIs work - Objects can be instantiated through experimental namespace These tests ensure backward compatibility while the migration happens. --- .../test_experimental_backward_compat.py | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 cuda_core/tests/test_experimental_backward_compat.py diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py new file mode 100644 index 0000000000..321cf79435 --- /dev/null +++ b/cuda_core/tests/test_experimental_backward_compat.py @@ -0,0 +1,157 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for backward compatibility of cuda.core.experimental namespace. + +These tests verify that the experimental namespace forwarding stubs work +correctly and emit appropriate deprecation warnings. +""" + +import warnings + +import pytest + +# Test that experimental imports still work +def test_experimental_imports_work(): + """Test that imports from experimental namespace still work.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # Test main module import + import cuda.core.experimental + + # Should emit deprecation warning + assert len(w) >= 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "deprecated" in str(w[0].message).lower() + + # Test that symbols are accessible + assert hasattr(cuda.core.experimental, "Device") + assert hasattr(cuda.core.experimental, "Stream") + assert hasattr(cuda.core.experimental, "Buffer") + assert hasattr(cuda.core.experimental, "system") + + +def test_experimental_symbols_are_same_objects(): + """Test that experimental namespace symbols are the same objects as core.""" + import cuda.core + import cuda.core.experimental + + # Compare classes/types + assert cuda.core.experimental.Device is cuda.core.Device + assert cuda.core.experimental.Stream is cuda.core.Stream + assert cuda.core.experimental.Buffer is cuda.core.Buffer + assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource + assert cuda.core.experimental.Program is cuda.core.Program + assert cuda.core.experimental.Kernel is cuda.core.Kernel + assert cuda.core.experimental.ObjectCode is cuda.core.ObjectCode + assert cuda.core.experimental.Graph is cuda.core.Graph + assert cuda.core.experimental.GraphBuilder is cuda.core.GraphBuilder + assert cuda.core.experimental.Event is cuda.core.Event + assert cuda.core.experimental.Linker is cuda.core.Linker + + # Compare singletons + assert cuda.core.experimental.system is cuda.core.system + + +def test_experimental_direct_imports(): + """Test that direct imports from experimental submodules work.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # Test various import patterns + from cuda.core.experimental import Device, Stream, Buffer + from cuda.core.experimental import Program, Kernel, ObjectCode + from cuda.core.experimental import Graph, GraphBuilder, Event + from cuda.core.experimental import Linker, launch + from cuda.core.experimental import system + + # Should have warnings + assert len(w) >= 1 + + # Verify objects are usable + assert Device is not None + assert Stream is not None + assert Buffer is not None + + +def test_experimental_submodule_access(): + """Test that accessing experimental submodules works.""" + import cuda.core.experimental + + # Test that submodules can be accessed (via __getattr__) + # Note: These may not exist as actual modules, but the forwarding should work + try: + # This should trigger __getattr__ and forward to the new location + _ = cuda.core.experimental._device + _ = cuda.core.experimental._stream + _ = cuda.core.experimental._memory + except AttributeError: + # It's okay if submodules aren't directly accessible + # The important thing is that public symbols work + pass + + +def test_experimental_utils_module(): + """Test that experimental.utils module works.""" + import cuda.core.experimental + + # Should be able to access utils + assert hasattr(cuda.core.experimental, "utils") + assert cuda.core.experimental.utils is not None + + # Should have expected utilities + from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory + assert StridedMemoryView is not None + assert args_viewable_as_strided_memory is not None + + +def test_experimental_options_classes(): + """Test that options classes are accessible.""" + import cuda.core.experimental + + assert hasattr(cuda.core.experimental, "EventOptions") + assert hasattr(cuda.core.experimental, "StreamOptions") + assert hasattr(cuda.core.experimental, "LaunchConfig") + assert hasattr(cuda.core.experimental, "ProgramOptions") + assert hasattr(cuda.core.experimental, "LinkerOptions") + assert hasattr(cuda.core.experimental, "GraphCompleteOptions") + assert hasattr(cuda.core.experimental, "GraphDebugPrintOptions") + assert hasattr(cuda.core.experimental, "DeviceMemoryResourceOptions") + assert hasattr(cuda.core.experimental, "VirtualMemoryResourceOptions") + + # Verify they're the same objects + assert cuda.core.experimental.EventOptions is cuda.core.EventOptions + assert cuda.core.experimental.StreamOptions is cuda.core.StreamOptions + assert cuda.core.experimental.LaunchConfig is cuda.core.LaunchConfig + + +def test_experimental_memory_classes(): + """Test that memory-related classes are accessible.""" + import cuda.core.experimental + + assert hasattr(cuda.core.experimental, "MemoryResource") + assert hasattr(cuda.core.experimental, "DeviceMemoryResource") + assert hasattr(cuda.core.experimental, "LegacyPinnedMemoryResource") + assert hasattr(cuda.core.experimental, "VirtualMemoryResource") + assert hasattr(cuda.core.experimental, "GraphMemoryResource") + + # Verify they're the same objects + assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource + assert cuda.core.experimental.DeviceMemoryResource is cuda.core.DeviceMemoryResource + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_experimental_instantiations(): + """Test that objects can be instantiated through experimental namespace.""" + from cuda.core.experimental import Device + + # Should be able to create objects + device = Device() + assert device is not None + + # Verify it's the same type + from cuda.core import Device as CoreDevice + assert isinstance(device, CoreDevice) From 55a50e6b926be038ddf7f17943f3f2c167af0946 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:26:00 -0800 Subject: [PATCH 08/26] Update example files to use new cuda.core.* imports [UNTESTED] Update all example files to import from cuda.core instead of cuda.core.experimental. Examples should demonstrate the new recommended import paths. --- cuda_core/cuda/core/_kernel_arg_handler.pyx | 4 ++-- cuda_core/cuda/core/_memory/_device_memory_resource.pyx | 2 +- cuda_core/cuda/core/_memoryview.pyx | 4 ++-- cuda_core/examples/cuda_graphs.py | 2 +- cuda_core/examples/jit_lto_fractal.py | 2 +- cuda_core/examples/memory_ops.py | 2 +- cuda_core/examples/pytorch_example.py | 2 +- cuda_core/examples/saxpy.py | 2 +- cuda_core/examples/show_device_properties.py | 2 +- cuda_core/examples/simple_multi_gpu_example.py | 2 +- cuda_core/examples/strided_memory_view_cpu.py | 2 +- cuda_core/examples/strided_memory_view_gpu.py | 4 ++-- cuda_core/examples/thread_block_cluster.py | 2 +- cuda_core/examples/vector_add.py | 2 +- 14 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cuda_core/cuda/core/_kernel_arg_handler.pyx b/cuda_core/cuda/core/_kernel_arg_handler.pyx index 0bb40bf404..b17e3fd3d2 100644 --- a/cuda_core/cuda/core/_kernel_arg_handler.pyx +++ b/cuda_core/cuda/core/_kernel_arg_handler.pyx @@ -15,8 +15,8 @@ import ctypes import numpy -from cuda.core.experimental._memory import Buffer -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._memory import Buffer +from cuda.core._utils.cuda_utils import driver ctypedef cpp_complex.complex[float] cpp_single_complex diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx index 24766748f7..5c7dda41e0 100644 --- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx @@ -27,7 +27,7 @@ import weakref from cuda.core._utils.cuda_utils import driver if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT from .._device import Device __all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions'] diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx index 40d70ad995..6bdb1591da 100644 --- a/cuda_core/cuda/core/_memoryview.pyx +++ b/cuda_core/cuda/core/_memoryview.pyx @@ -9,8 +9,8 @@ from typing import Optional import numpy -from cuda.core.experimental._utils.cuda_utils import handle_return, driver -from cuda.core.experimental._utils cimport cuda_utils +from cuda.core._utils.cuda_utils import handle_return, driver +from cuda.core._utils cimport cuda_utils # TODO(leofang): support NumPy structured dtypes diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py index 2d2d9833fb..9cc759b500 100644 --- a/cuda_core/examples/cuda_graphs.py +++ b/cuda_core/examples/cuda_graphs.py @@ -13,7 +13,7 @@ import time import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch def main(): diff --git a/cuda_core/examples/jit_lto_fractal.py b/cuda_core/examples/jit_lto_fractal.py index d1553f6b67..b0040708b6 100644 --- a/cuda_core/examples/jit_lto_fractal.py +++ b/cuda_core/examples/jit_lto_fractal.py @@ -25,7 +25,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Linker, LinkerOptions, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Linker, LinkerOptions, Program, ProgramOptions, launch # ################################################################################ diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py index c4abd06e2c..123b1f6a11 100644 --- a/cuda_core/examples/memory_ops.py +++ b/cuda_core/examples/memory_ops.py @@ -16,7 +16,7 @@ import cupy as cp import numpy as np -from cuda.core.experimental import ( +from cuda.core import ( Device, LaunchConfig, LegacyPinnedMemoryResource, diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py index ea067302b9..433d63c9eb 100644 --- a/cuda_core/examples/pytorch_example.py +++ b/cuda_core/examples/pytorch_example.py @@ -15,7 +15,7 @@ import sys import torch -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # SAXPY kernel - passing a as a pointer to avoid any type issues code = """ diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py index f38caef392..aa0d77eff9 100644 --- a/cuda_core/examples/saxpy.py +++ b/cuda_core/examples/saxpy.py @@ -14,7 +14,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # compute out = a * x + y code = """ diff --git a/cuda_core/examples/show_device_properties.py b/cuda_core/examples/show_device_properties.py index 8fcecd2d4c..41609de8e5 100644 --- a/cuda_core/examples/show_device_properties.py +++ b/cuda_core/examples/show_device_properties.py @@ -11,7 +11,7 @@ import sys -from cuda.core.experimental import Device, system +from cuda.core import Device, system # Convert boolean to YES or NO string diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index d91ab2c856..c53c1b518a 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -12,7 +12,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, launch, system +from cuda.core import Device, LaunchConfig, Program, launch, system if system.num_devices < 2: print("this example requires at least 2 GPUs", file=sys.stderr) diff --git a/cuda_core/examples/strided_memory_view_cpu.py b/cuda_core/examples/strided_memory_view_cpu.py index de6007fd26..a20377cc76 100644 --- a/cuda_core/examples/strided_memory_view_cpu.py +++ b/cuda_core/examples/strided_memory_view_cpu.py @@ -26,7 +26,7 @@ print("cffi is not installed, the CPU example will be skipped", file=sys.stderr) FFI = None import numpy as np -from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory # ################################################################################ # diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py index 3e456776a8..e91ddc25cc 100644 --- a/cuda_core/examples/strided_memory_view_gpu.py +++ b/cuda_core/examples/strided_memory_view_gpu.py @@ -23,8 +23,8 @@ print("cupy is not installed, the GPU example will be skipped", file=sys.stderr) cp = None import numpy as np -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch -from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory # ################################################################################ # diff --git a/cuda_core/examples/thread_block_cluster.py b/cuda_core/examples/thread_block_cluster.py index e14158f8bd..f1ea8b8579 100644 --- a/cuda_core/examples/thread_block_cluster.py +++ b/cuda_core/examples/thread_block_cluster.py @@ -13,7 +13,7 @@ import sys import numpy as np -from cuda.core.experimental import ( +from cuda.core import ( Device, LaunchConfig, LegacyPinnedMemoryResource, diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py index 2851303c7e..d31ab77208 100644 --- a/cuda_core/examples/vector_add.py +++ b/cuda_core/examples/vector_add.py @@ -10,7 +10,7 @@ # ################################################################################ import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # compute c = a + b code = """ From b2f28e98996b990f62b220c2b4967fabc9a34e81 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:48:30 -0800 Subject: [PATCH 09/26] Fix build_hooks.py to use new cuda.core.* paths [UNTESTED] Update build_hooks.py to look for Cython extensions in cuda/core/ instead of cuda/core/experimental/. This fixes the build system to work with the moved files. --- cuda_core/build_hooks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py index e38f5676df..98f8da0f48 100644 --- a/cuda_core/build_hooks.py +++ b/cuda_core/build_hooks.py @@ -66,7 +66,7 @@ def _build_cuda_core(): # It seems setuptools' wildcard support has problems for namespace packages, # so we explicitly spell out all Extension instances. - root_module = "cuda.core.experimental" + root_module = "cuda.core" root_path = f"{os.path.sep}".join(root_module.split(".")) + os.path.sep ext_files = glob.glob(f"{root_path}/**/*.pyx", recursive=True) @@ -86,8 +86,8 @@ def get_cuda_paths(): ext_modules = tuple( Extension( - f"cuda.core.experimental.{mod.replace(os.path.sep, '.')}", - sources=[f"cuda/core/experimental/{mod}.pyx"], + f"cuda.core.{mod.replace(os.path.sep, '.')}", + sources=[f"cuda/core/{mod}.pyx"], include_dirs=list(os.path.join(root, "include") for root in get_cuda_paths()), language="c++", ) From 45c92f524e8f7a0b53c3a2fb94ce0d22a8b7bc02 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:48:58 -0800 Subject: [PATCH 10/26] Fix _context.pyx to use new import path [UNTESTED] Update _context.pyx to import from cuda.core._utils.cuda_utils instead of cuda.core.experimental._utils.cuda_utils. This fixes a circular import issue during module initialization. --- cuda_core/cuda/core/_context.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/_context.pyx b/cuda_core/cuda/core/_context.pyx index f9858c1710..c1c28b3389 100644 --- a/cuda_core/cuda/core/_context.pyx +++ b/cuda_core/cuda/core/_context.pyx @@ -4,7 +4,7 @@ from dataclasses import dataclass -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._utils.cuda_utils import driver @dataclass From b7c0cc03ed330f3329bee4adfe1bdf019a937a20 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:49:19 -0800 Subject: [PATCH 11/26] Fix _memory/_legacy.py to use new import path [UNTESTED] Update _memory/_legacy.py to import from cuda.core._utils.cuda_utils instead of cuda.core.experimental._utils.cuda_utils. This fixes a circular import issue. --- cuda_core/cuda/core/_memory/_legacy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py index 74ba1eb5aa..317494ea9e 100644 --- a/cuda_core/cuda/core/_memory/_legacy.py +++ b/cuda_core/cuda/core/_memory/_legacy.py @@ -10,7 +10,7 @@ from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._utils.cuda_utils import ( driver, ) From d47b29dc8b4a3211e45c1eb01b12c454979a8bdd Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:49:23 -0800 Subject: [PATCH 12/26] Fix _memory/_virtual_memory_resource.py to use new import path [UNTESTED] Update _memory/_virtual_memory_resource.py to import from cuda.core._utils.cuda_utils instead of cuda.core.experimental._utils.cuda_utils. This fixes a circular import issue. --- cuda_core/cuda/core/_memory/_virtual_memory_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py index bd7322c4d3..70ecfb712c 100644 --- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py @@ -15,7 +15,7 @@ driver, get_binding_version, ) -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) From 2e0da66fda38531a74feeb5339ede0feb036294b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:52:47 -0800 Subject: [PATCH 13/26] Fix test helpers to use new cuda.core.* import paths [UNTESTED] Update cuda_python_test_helpers to import from cuda.core._utils instead of cuda.core.experimental._utils. --- cuda_python_test_helpers/cuda_python_test_helpers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py index a661b4f1aa..e7829df406 100644 --- a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py +++ b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py @@ -9,7 +9,7 @@ from contextlib import suppress from typing import Union -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core._utils.cuda_utils import handle_return __all__ = [ "IS_WINDOWS", From c414b7dc2c8906f2d729b0cd27b4e397bb923c97 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:54:04 -0800 Subject: [PATCH 14/26] Fix test files to use new cuda.core._* import paths [UNTESTED] Update test files to access internal modules via cuda.core._* instead of cuda.core.experimental._*. This fixes test failures after the namespace migration. --- cuda_core/tests/test_context.py | 2 +- cuda_core/tests/test_device.py | 2 +- cuda_core/tests/test_event.py | 2 +- cuda_core/tests/test_module.py | 6 +++--- cuda_core/tests/test_utils.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cuda_core/tests/test_context.py b/cuda_core/tests/test_context.py index f2204720da..5183aa1a85 100644 --- a/cuda_core/tests/test_context.py +++ b/cuda_core/tests/test_context.py @@ -8,7 +8,7 @@ def test_context_init_disabled(): with pytest.raises(RuntimeError, match=r"^Context objects cannot be instantiated directly\."): - cuda.core.experimental._context.Context() # Ensure back door is locked. + cuda.core._context.Context() # Ensure back door is locked. # ============================================================================ diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 3b277a8811..5f52f96565 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -14,7 +14,7 @@ def test_device_init_disabled(): with pytest.raises(RuntimeError, match=r"^DeviceProperties cannot be instantiated directly\."): - cuda.core.experimental._device.DeviceProperties() # Ensure back door is locked. + cuda.core._device.DeviceProperties() # Ensure back door is locked. @pytest.fixture(scope="module") diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index eba46c9327..0d8f3a3c2d 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -17,7 +17,7 @@ def test_event_init_disabled(): with pytest.raises(RuntimeError, match=r"^Event objects cannot be instantiated directly\."): - cuda.core.experimental._event.Event() # Ensure back door is locked. + cuda.core._event.Event() # Ensure back door is locked. def test_timing_success(init_cuda): diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index a24f42a08c..639bc7fe8d 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -41,17 +41,17 @@ def cuda12_4_prerequisite_check(): def test_kernel_attributes_init_disabled(): with pytest.raises(RuntimeError, match=r"^KernelAttributes cannot be instantiated directly\."): - cuda.core.experimental._module.KernelAttributes() # Ensure back door is locked. + cuda.core._module.KernelAttributes() # Ensure back door is locked. def test_kernel_occupancy_init_disabled(): with pytest.raises(RuntimeError, match=r"^KernelOccupancy cannot be instantiated directly\."): - cuda.core.experimental._module.KernelOccupancy() # Ensure back door is locked. + cuda.core._module.KernelOccupancy() # Ensure back door is locked. def test_kernel_init_disabled(): with pytest.raises(RuntimeError, match=r"^Kernel objects cannot be instantiated directly\."): - cuda.core.experimental._module.Kernel() # Ensure back door is locked. + cuda.core._module.Kernel() # Ensure back door is locked. def test_object_code_init_disabled(): diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py index 96b05fe774..622fdff3a3 100644 --- a/cuda_core/tests/test_utils.py +++ b/cuda_core/tests/test_utils.py @@ -19,7 +19,7 @@ def test_cast_to_3_tuple_success(): - c3t = cuda.core.experimental._utils.cuda_utils.cast_to_3_tuple + c3t = cuda.core._utils.cuda_utils.cast_to_3_tuple assert c3t("", ()) == (1, 1, 1) assert c3t("", 2) == (2, 1, 1) assert c3t("", (2,)) == (2, 1, 1) @@ -43,7 +43,7 @@ def test_cast_to_3_tuple_success(): ) def test_cast_to_3_tuple_value_error(cfg, expected): with pytest.raises(ValueError, match=expected): - cuda.core.experimental._utils.cuda_utils.cast_to_3_tuple("Lbl", cfg) + cuda.core._utils.cuda_utils.cast_to_3_tuple("Lbl", cfg) def convert_strides_to_counts(strides, itemsize): From d58244807cce604412e84111cf1f270b0c236451 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:54:09 -0800 Subject: [PATCH 15/26] Fix experimental.utils submodule access and test_device_id [UNTESTED] Make utils accessible as cuda.core.experimental.utils submodule for backward compatibility. Also fix test_device_id to use cuda.core.system instead of cuda.core.experimental.system. --- cuda_core/cuda/core/experimental/__init__.py | 2 ++ cuda_core/tests/test_device.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 7a29119a48..afc47a4a39 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -64,6 +64,8 @@ def _warn_deprecated(): _warn_deprecated() from cuda.core import utils # noqa: E402 +# Make utils accessible as a submodule for backward compatibility +__import__("sys").modules[__spec__.name + ".utils"] = utils from cuda.core._device import Device # noqa: E402 from cuda.core._event import Event, EventOptions # noqa: E402 from cuda.core._graph import ( # noqa: E402 diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 5f52f96565..69849b1a2e 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -48,7 +48,7 @@ def test_device_alloc(deinit_cuda): def test_device_id(deinit_cuda): - for device in cuda.core.experimental.system.devices: + for device in cuda.core.system.devices: device.set_current() assert device.device_id == handle_return(runtime.cudaGetDevice()) From 3cc287c6bc4e9c85dca6f4901301dd23c7f29843 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 16:55:19 -0800 Subject: [PATCH 16/26] Fix test_experimental_direct_imports to handle module caching [UNTESTED] Clear cached experimental module before testing to ensure warnings are emitted. Also improve assertion message. --- cuda_core/tests/test_experimental_backward_compat.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py index 321cf79435..c5d8c21e0f 100644 --- a/cuda_core/tests/test_experimental_backward_compat.py +++ b/cuda_core/tests/test_experimental_backward_compat.py @@ -58,6 +58,11 @@ def test_experimental_symbols_are_same_objects(): def test_experimental_direct_imports(): """Test that direct imports from experimental submodules work.""" + # Clear any cached imports to ensure warnings are emitted + import sys + if 'cuda.core.experimental' in sys.modules: + del sys.modules['cuda.core.experimental'] + with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @@ -68,8 +73,8 @@ def test_experimental_direct_imports(): from cuda.core.experimental import Linker, launch from cuda.core.experimental import system - # Should have warnings - assert len(w) >= 1 + # Should have warnings (at least one from the initial import) + assert len(w) >= 1, f"Expected at least 1 deprecation warning, got {len(w)}" # Verify objects are usable assert Device is not None From 4f34fdd452e1bebe69765daa13957f94f4f0c2b7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 21:00:32 -0800 Subject: [PATCH 17/26] Address review comments: use pytest.deprecated_call and clarify warning behavior - Replace manual warning catching with pytest.deprecated_call() for cleaner tests - Add documentation clarifying that deprecation warnings are only emitted at module import time, not on each attribute access - Update test_experimental_utils_module docstring to explain warning behavior --- cuda_core/cuda/core/experimental/__init__.py | 9 ++- .../test_experimental_backward_compat.py | 59 +++++++++---------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index afc47a4a39..060fdac925 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -48,7 +48,14 @@ def _warn_deprecated(): - """Emit a deprecation warning for using the experimental namespace.""" + """Emit a deprecation warning for using the experimental namespace. + + Note: This warning is only when the experimental module is first imported. + Subsequent accesses to attributes (like utils, Device, etc.) do not trigger + additional warnings since they are already set in the module namespace. + Only accessing submodules via __getattr__ (e.g., _device, _utils) will trigger + additional warnings. + """ warnings.warn( "The cuda.core.experimental namespace is deprecated. " "Please import directly from cuda.core instead. " diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py index c5d8c21e0f..16f8e00387 100644 --- a/cuda_core/tests/test_experimental_backward_compat.py +++ b/cuda_core/tests/test_experimental_backward_compat.py @@ -16,22 +16,20 @@ # Test that experimental imports still work def test_experimental_imports_work(): """Test that imports from experimental namespace still work.""" - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - - # Test main module import + # Clear cached module to ensure warning is emitted + import sys + if 'cuda.core.experimental' in sys.modules: + del sys.modules['cuda.core.experimental'] + + # Test main module import - should emit deprecation warning + with pytest.deprecated_call(): import cuda.core.experimental - - # Should emit deprecation warning - assert len(w) >= 1 - assert issubclass(w[0].category, DeprecationWarning) - assert "deprecated" in str(w[0].message).lower() - - # Test that symbols are accessible - assert hasattr(cuda.core.experimental, "Device") - assert hasattr(cuda.core.experimental, "Stream") - assert hasattr(cuda.core.experimental, "Buffer") - assert hasattr(cuda.core.experimental, "system") + + # Test that symbols are accessible + assert hasattr(cuda.core.experimental, "Device") + assert hasattr(cuda.core.experimental, "Stream") + assert hasattr(cuda.core.experimental, "Buffer") + assert hasattr(cuda.core.experimental, "system") def test_experimental_symbols_are_same_objects(): @@ -63,23 +61,18 @@ def test_experimental_direct_imports(): if 'cuda.core.experimental' in sys.modules: del sys.modules['cuda.core.experimental'] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - - # Test various import patterns + # Test various import patterns - warning is emitted once at module import time + with pytest.deprecated_call(): from cuda.core.experimental import Device, Stream, Buffer from cuda.core.experimental import Program, Kernel, ObjectCode from cuda.core.experimental import Graph, GraphBuilder, Event from cuda.core.experimental import Linker, launch from cuda.core.experimental import system - - # Should have warnings (at least one from the initial import) - assert len(w) >= 1, f"Expected at least 1 deprecation warning, got {len(w)}" - - # Verify objects are usable - assert Device is not None - assert Stream is not None - assert Buffer is not None + + # Verify objects are usable + assert Device is not None + assert Stream is not None + assert Buffer is not None def test_experimental_submodule_access(): @@ -100,14 +93,20 @@ def test_experimental_submodule_access(): def test_experimental_utils_module(): - """Test that experimental.utils module works.""" + """Test that experimental.utils module works. + + Note: The deprecation warning is only emitted once at import time when + cuda.core.experimental is first imported. Accessing utils or importing + from utils does not trigger additional warnings since utils is already + set as an attribute in the module namespace. + """ import cuda.core.experimental - # Should be able to access utils + # Should be able to access utils (no warning on access, only on initial import) assert hasattr(cuda.core.experimental, "utils") assert cuda.core.experimental.utils is not None - # Should have expected utilities + # Should have expected utilities (no warning on import from utils submodule) from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory assert StridedMemoryView is not None assert args_viewable_as_strided_memory is not None From 6718a70d22c518d52db2aab4c1df90394d78ef12 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 21:07:24 -0800 Subject: [PATCH 18/26] pre-commit fixes (automatic, trivial) --- .spdx-ignore | 2 +- cuda_core/cuda/core/experimental/__init__.py | 28 +++++++-- .../test_experimental_backward_compat.py | 57 ++++++++++--------- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/.spdx-ignore b/.spdx-ignore index 84f051fafc..d644861959 100644 --- a/.spdx-ignore +++ b/.spdx-ignore @@ -10,4 +10,4 @@ requirements*.txt cuda_bindings/examples/* # Vendored -cuda_core/cuda/core/experimental/include/dlpack.h +cuda_core/cuda/core/include/dlpack.h diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 060fdac925..f140844197 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -49,7 +49,7 @@ def _warn_deprecated(): """Emit a deprecation warning for using the experimental namespace. - + Note: This warning is only when the experimental module is first imported. Subsequent accesses to attributes (like utils, Device, etc.) do not trigger additional warnings since they are already set in the module namespace. @@ -71,6 +71,7 @@ def _warn_deprecated(): _warn_deprecated() from cuda.core import utils # noqa: E402 + # Make utils accessible as a submodule for backward compatibility __import__("sys").modules[__spec__.name + ".utils"] = utils from cuda.core._device import Device # noqa: E402 @@ -103,17 +104,34 @@ def _warn_deprecated(): __import__("sys").modules[__spec__.name + ".system"] = system del System + # Also create forwarding stubs for submodules # These will be imported lazily when accessed def __getattr__(name): """Forward attribute access to the new location with deprecation warning.""" - if name in ("_device", "_event", "_graph", "_launch_config", "_launcher", - "_linker", "_memory", "_module", "_program", "_stream", "_system", - "_utils", "_context", "_dlpack", "_kernel_arg_handler", - "_launch_config", "_memoryview"): + if name in ( + "_device", + "_event", + "_graph", + "_launch_config", + "_launcher", + "_linker", + "_memory", + "_module", + "_program", + "_stream", + "_system", + "_utils", + "_context", + "_dlpack", + "_kernel_arg_handler", + "_launch_config", + "_memoryview", + ): _warn_deprecated() # Import the submodule from the new location import importlib + new_name = name.lstrip("_") try: return importlib.import_module(f"cuda.core.{new_name}") diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py index 16f8e00387..4ebeb8025d 100644 --- a/cuda_core/tests/test_experimental_backward_compat.py +++ b/cuda_core/tests/test_experimental_backward_compat.py @@ -9,22 +9,22 @@ correctly and emit appropriate deprecation warnings. """ -import warnings - import pytest + # Test that experimental imports still work def test_experimental_imports_work(): """Test that imports from experimental namespace still work.""" # Clear cached module to ensure warning is emitted import sys - if 'cuda.core.experimental' in sys.modules: - del sys.modules['cuda.core.experimental'] - + + if "cuda.core.experimental" in sys.modules: + del sys.modules["cuda.core.experimental"] + # Test main module import - should emit deprecation warning with pytest.deprecated_call(): import cuda.core.experimental - + # Test that symbols are accessible assert hasattr(cuda.core.experimental, "Device") assert hasattr(cuda.core.experimental, "Stream") @@ -36,7 +36,7 @@ def test_experimental_symbols_are_same_objects(): """Test that experimental namespace symbols are the same objects as core.""" import cuda.core import cuda.core.experimental - + # Compare classes/types assert cuda.core.experimental.Device is cuda.core.Device assert cuda.core.experimental.Stream is cuda.core.Stream @@ -49,7 +49,7 @@ def test_experimental_symbols_are_same_objects(): assert cuda.core.experimental.GraphBuilder is cuda.core.GraphBuilder assert cuda.core.experimental.Event is cuda.core.Event assert cuda.core.experimental.Linker is cuda.core.Linker - + # Compare singletons assert cuda.core.experimental.system is cuda.core.system @@ -58,17 +58,18 @@ def test_experimental_direct_imports(): """Test that direct imports from experimental submodules work.""" # Clear any cached imports to ensure warnings are emitted import sys - if 'cuda.core.experimental' in sys.modules: - del sys.modules['cuda.core.experimental'] - + + if "cuda.core.experimental" in sys.modules: + del sys.modules["cuda.core.experimental"] + # Test various import patterns - warning is emitted once at module import time with pytest.deprecated_call(): - from cuda.core.experimental import Device, Stream, Buffer - from cuda.core.experimental import Program, Kernel, ObjectCode - from cuda.core.experimental import Graph, GraphBuilder, Event - from cuda.core.experimental import Linker, launch - from cuda.core.experimental import system - + from cuda.core.experimental import ( + Buffer, + Device, + Stream, + ) + # Verify objects are usable assert Device is not None assert Stream is not None @@ -78,7 +79,7 @@ def test_experimental_direct_imports(): def test_experimental_submodule_access(): """Test that accessing experimental submodules works.""" import cuda.core.experimental - + # Test that submodules can be accessed (via __getattr__) # Note: These may not exist as actual modules, but the forwarding should work try: @@ -94,20 +95,21 @@ def test_experimental_submodule_access(): def test_experimental_utils_module(): """Test that experimental.utils module works. - + Note: The deprecation warning is only emitted once at import time when cuda.core.experimental is first imported. Accessing utils or importing from utils does not trigger additional warnings since utils is already set as an attribute in the module namespace. """ import cuda.core.experimental - + # Should be able to access utils (no warning on access, only on initial import) assert hasattr(cuda.core.experimental, "utils") assert cuda.core.experimental.utils is not None - + # Should have expected utilities (no warning on import from utils submodule) from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory + assert StridedMemoryView is not None assert args_viewable_as_strided_memory is not None @@ -115,7 +117,7 @@ def test_experimental_utils_module(): def test_experimental_options_classes(): """Test that options classes are accessible.""" import cuda.core.experimental - + assert hasattr(cuda.core.experimental, "EventOptions") assert hasattr(cuda.core.experimental, "StreamOptions") assert hasattr(cuda.core.experimental, "LaunchConfig") @@ -125,7 +127,7 @@ def test_experimental_options_classes(): assert hasattr(cuda.core.experimental, "GraphDebugPrintOptions") assert hasattr(cuda.core.experimental, "DeviceMemoryResourceOptions") assert hasattr(cuda.core.experimental, "VirtualMemoryResourceOptions") - + # Verify they're the same objects assert cuda.core.experimental.EventOptions is cuda.core.EventOptions assert cuda.core.experimental.StreamOptions is cuda.core.StreamOptions @@ -135,13 +137,13 @@ def test_experimental_options_classes(): def test_experimental_memory_classes(): """Test that memory-related classes are accessible.""" import cuda.core.experimental - + assert hasattr(cuda.core.experimental, "MemoryResource") assert hasattr(cuda.core.experimental, "DeviceMemoryResource") assert hasattr(cuda.core.experimental, "LegacyPinnedMemoryResource") assert hasattr(cuda.core.experimental, "VirtualMemoryResource") assert hasattr(cuda.core.experimental, "GraphMemoryResource") - + # Verify they're the same objects assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource assert cuda.core.experimental.DeviceMemoryResource is cuda.core.DeviceMemoryResource @@ -151,11 +153,12 @@ def test_experimental_memory_classes(): def test_experimental_instantiations(): """Test that objects can be instantiated through experimental namespace.""" from cuda.core.experimental import Device - + # Should be able to create objects device = Device() assert device is not None - + # Verify it's the same type from cuda.core import Device as CoreDevice + assert isinstance(device, CoreDevice) From 506b9129a5ce48dd9b3ce3b2f39ac43a940ddb38 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 21:08:47 -0800 Subject: [PATCH 19/26] Fix import path in test_memory_peer_access.py Update import from cuda.core.experimental._utils.cuda_utils to cuda.core._utils.cuda_utils to match the new module structure. --- cuda_core/tests/test_memory_peer_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/tests/test_memory_peer_access.py b/cuda_core/tests/test_memory_peer_access.py index 4bb9fe561d..c2e51bed11 100644 --- a/cuda_core/tests/test_memory_peer_access.py +++ b/cuda_core/tests/test_memory_peer_access.py @@ -3,8 +3,8 @@ import cuda.core.experimental import pytest +from cuda.core._utils.cuda_utils import CUDAError from cuda.core.experimental import Device, DeviceMemoryResource -from cuda.core.experimental._utils.cuda_utils import CUDAError from helpers.buffers import PatternGen, compare_buffer_to_constant, make_scratch_buffer NBYTES = 1024 From dfa0f0e0dec26f532918763f7a96f84469dea6a9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 1 Dec 2025 21:57:16 -0800 Subject: [PATCH 20/26] Remove experimental namespace from remaining test files - Update cython/build_tests.sh to use cuda/core/include instead of cuda/core/experimental/include - Update test_memory_peer_access.py to import from cuda.core instead of cuda.core.experimental - Update test_module.py to use cuda.core.LaunchConfig instead of cuda.core.experimental.LaunchConfig --- cuda_core/tests/cython/build_tests.sh | 4 ++-- cuda_core/tests/test_memory_peer_access.py | 8 ++++---- cuda_core/tests/test_module.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cuda_core/tests/cython/build_tests.sh b/cuda_core/tests/cython/build_tests.sh index eb3303840d..98851edefa 100755 --- a/cuda_core/tests/cython/build_tests.sh +++ b/cuda_core/tests/cython/build_tests.sh @@ -6,10 +6,10 @@ UNAME=$(uname) if [ "$UNAME" == "Linux" ] ; then SCRIPTPATH=$(dirname $(realpath "$0")) - export CPLUS_INCLUDE_PATH=${SCRIPTPATH}/../../cuda/core/experimental/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH + export CPLUS_INCLUDE_PATH=${SCRIPTPATH}/../../cuda/core/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH elif [[ "$UNAME" == CYGWIN* || "$UNAME" == MINGW* || "$UNAME" == MSYS* ]] ; then SCRIPTPATH="$(dirname $(cygpath -w $(realpath "$0")))" - CUDA_CORE_INCLUDE_PATH=$(echo "${SCRIPTPATH}\..\..\cuda\core\experimental\include" | sed 's/\\/\\\\/g') + CUDA_CORE_INCLUDE_PATH=$(echo "${SCRIPTPATH}\..\..\cuda\core\include" | sed 's/\\/\\\\/g') export CL="/I\"${CUDA_CORE_INCLUDE_PATH}\" /I\"${CUDA_HOME}\\include\" ${CL}" else exit 1 diff --git a/cuda_core/tests/test_memory_peer_access.py b/cuda_core/tests/test_memory_peer_access.py index c2e51bed11..c71a805ec2 100644 --- a/cuda_core/tests/test_memory_peer_access.py +++ b/cuda_core/tests/test_memory_peer_access.py @@ -1,17 +1,17 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import cuda.core.experimental +import cuda.core import pytest +from cuda.core import Device, DeviceMemoryResource from cuda.core._utils.cuda_utils import CUDAError -from cuda.core.experimental import Device, DeviceMemoryResource from helpers.buffers import PatternGen, compare_buffer_to_constant, make_scratch_buffer NBYTES = 1024 def _mempool_device_impl(num): - num_devices = len(cuda.core.experimental.system.devices) + num_devices = len(cuda.core.system.devices) if num_devices < num: pytest.skip("Test requires at least {num} GPUs") @@ -102,7 +102,7 @@ def check(expected): with pytest.raises(ValueError, match=r"device_id must be \>\= 0"): dmr.peer_accessible_by = [-1] # device ID out of bounds - num_devices = len(cuda.core.experimental.system.devices) + num_devices = len(cuda.core.system.devices) with pytest.raises(ValueError, match=r"device_id must be within \[0, \d+\)"): dmr.peer_accessible_by = [num_devices] # device ID out of bounds diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 639bc7fe8d..a1934834fc 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -387,7 +387,7 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel_cubin, cluster): dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") - launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64, cluster=cluster) + launch_config = cuda.core.LaunchConfig(grid=128, block=64, cluster=cluster) query_fn = kernel.occupancy.max_active_clusters max_active_clusters = query_fn(launch_config) assert isinstance(max_active_clusters, int) @@ -402,7 +402,7 @@ def test_occupancy_max_potential_cluster_size(get_saxpy_kernel_cubin): dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") - launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64) + launch_config = cuda.core.LaunchConfig(grid=128, block=64) query_fn = kernel.occupancy.max_potential_cluster_size max_potential_cluster_size = query_fn(launch_config) assert isinstance(max_potential_cluster_size, int) From 4307b5d1303ac368c372f59257bad5128af76f9e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 09:09:02 -0800 Subject: [PATCH 21/26] Migrate StridedLayout from experimental to main namespace - Moved _layout.pxd and _layout.pyx from experimental/ to cuda/core/ - Updated all imports from cuda.core.experimental._layout to cuda.core._layout - Added StridedLayout to cuda.core.__init__.py exports - Added backward compatibility stub in experimental/__init__.py - Fixed docstring references in _linker.py and _module.py - Updated pyproject.toml package-data path - Updated docs conf.py excluded_dirs All code has been migrated from cuda.core.experimental except backward compatibility stubs. --- cuda_core/cuda/core/__init__.py | 1 + cuda_core/cuda/core/{experimental => }/_layout.pxd | 0 cuda_core/cuda/core/{experimental => }/_layout.pyx | 0 cuda_core/cuda/core/_linker.py | 2 +- cuda_core/cuda/core/_memoryview.pyx | 2 +- cuda_core/cuda/core/_module.py | 2 +- cuda_core/cuda/core/experimental/__init__.py | 3 +++ cuda_core/cuda/core/utils.py | 2 +- cuda_core/docs/source/conf.py | 4 ++-- cuda_core/pyproject.toml | 2 +- cuda_core/tests/test_strided_layout.py | 2 +- cuda_core/tests/test_utils.py | 2 +- 12 files changed, 13 insertions(+), 9 deletions(-) rename cuda_core/cuda/core/{experimental => }/_layout.pxd (100%) rename cuda_core/cuda/core/{experimental => }/_layout.pyx (100%) diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py index d84b9e072d..d074be02d8 100644 --- a/cuda_core/cuda/core/__init__.py +++ b/cuda_core/cuda/core/__init__.py @@ -39,6 +39,7 @@ ) from cuda.core._launch_config import LaunchConfig # noqa: E402 from cuda.core._launcher import launch # noqa: E402 +from cuda.core._layout import StridedLayout # noqa: E402 from cuda.core._linker import Linker, LinkerOptions # noqa: E402 from cuda.core._memory import ( # noqa: E402 Buffer, diff --git a/cuda_core/cuda/core/experimental/_layout.pxd b/cuda_core/cuda/core/_layout.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_layout.pxd rename to cuda_core/cuda/core/_layout.pxd diff --git a/cuda_core/cuda/core/experimental/_layout.pyx b/cuda_core/cuda/core/_layout.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_layout.pyx rename to cuda_core/cuda/core/_layout.pyx diff --git a/cuda_core/cuda/core/_linker.py b/cuda_core/cuda/core/_linker.py index fcb3416f33..2d2e47bb3e 100644 --- a/cuda_core/cuda/core/_linker.py +++ b/cuda_core/cuda/core/_linker.py @@ -355,7 +355,7 @@ def _exception_manager(self): class Linker: """Represent a linking machinery to link one or multiple object codes into - :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options. + :obj:`~cuda.core._module.ObjectCode` with the specified options. This object provides a unified interface to multiple underlying linker libraries (such as nvJitLink or cuLink* from CUDA driver). diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx index e5ce9a2e97..6c329dc4ff 100644 --- a/cuda_core/cuda/core/_memoryview.pyx +++ b/cuda_core/cuda/core/_memoryview.pyx @@ -4,7 +4,7 @@ from ._dlpack cimport * from libc.stdint cimport intptr_t -from cuda.core.experimental._layout cimport StridedLayout +from cuda.core._layout cimport StridedLayout from cuda.core._stream import Stream import functools diff --git a/cuda_core/cuda/core/_module.py b/cuda_core/cuda/core/_module.py index b0f6ff387d..fbea314406 100644 --- a/cuda_core/cuda/core/_module.py +++ b/cuda_core/cuda/core/_module.py @@ -453,7 +453,7 @@ class ObjectCode: This class has no default constructor. If you already have a cubin that you would like to load, use the :meth:`from_cubin` alternative constructor. Constructing directly from all other possible code types should be avoided in favor of compilation through - :class:`~cuda.core.experimental.Program` + :class:`~cuda.core.Program` Note ---- diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index f140844197..a8e28ce09e 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -39,6 +39,7 @@ ProgramOptions, Stream, StreamOptions, + StridedLayout, VirtualMemoryResource, VirtualMemoryResourceOptions, launch, @@ -84,6 +85,7 @@ def _warn_deprecated(): ) from cuda.core._launch_config import LaunchConfig # noqa: E402 from cuda.core._launcher import launch # noqa: E402 +from cuda.core._layout import StridedLayout # noqa: E402 from cuda.core._linker import Linker, LinkerOptions # noqa: E402 from cuda.core._memory import ( # noqa: E402 Buffer, @@ -115,6 +117,7 @@ def __getattr__(name): "_graph", "_launch_config", "_launcher", + "_layout", "_linker", "_memory", "_module", diff --git a/cuda_core/cuda/core/utils.py b/cuda_core/cuda/core/utils.py index ab9d63dfe0..177adaef2f 100644 --- a/cuda_core/cuda/core/utils.py +++ b/cuda_core/cuda/core/utils.py @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +from cuda.core._layout import StridedLayout # noqa: F401 from cuda.core._memoryview import ( StridedMemoryView, # noqa: F401 args_viewable_as_strided_memory, # noqa: F401 ) -from cuda.core.experimental._layout import StridedLayout # noqa: F401 diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index bab2a2b942..47147965ff 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -129,8 +129,8 @@ def skip_member(app, what, name, obj, skip, options): # are assumed to be properties (because cythonized # properties are not recognized as such by autodoc) excluded_dirs = [ - "cuda.core.experimental._layout", - "cuda.core.experimental._memoryview", + "cuda.core._layout", + "cuda.core._memoryview", ] if what == "attribute" and getattr(obj, "__doc__", None) is None: obj_module = getattr(getattr(obj, "__objclass__", None), "__module__", None) diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index af99ddd361..d25442258d 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -69,7 +69,7 @@ issues = "https://github.com/NVIDIA/cuda-python/issues/" include = ["cuda.core*"] [tool.setuptools.package-data] -"cuda.core.experimental.include" = ["*.h", "*.hpp", "*.cuh"] +"cuda.core.include" = ["*.h", "*.hpp", "*.cuh"] [tool.setuptools.dynamic] version = { attr = "cuda.core._version.__version__" } diff --git a/cuda_core/tests/test_strided_layout.py b/cuda_core/tests/test_strided_layout.py index a0f63f7aaa..f2baaa03d3 100644 --- a/cuda_core/tests/test_strided_layout.py +++ b/cuda_core/tests/test_strided_layout.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from cuda.core.experimental._layout import StridedLayout +from cuda.core._layout import StridedLayout from helpers.layout import ( DenseOrder, LayoutSpec, diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py index 924af311df..502adacc89 100644 --- a/cuda_core/tests/test_utils.py +++ b/cuda_core/tests/test_utils.py @@ -16,7 +16,7 @@ import numpy as np import pytest from cuda.core import Device -from cuda.core.experimental._layout import StridedLayout +from cuda.core._layout import StridedLayout from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory From 4b9796b8f7a1a51682eb43adeb167ea150f17e3e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 09:16:55 -0800 Subject: [PATCH 22/26] Sort __getattr__ name list alphabetically and remove one duplicate (_launch_config). --- cuda_core/cuda/core/experimental/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index a8e28ce09e..832dce11b3 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -112,24 +112,23 @@ def _warn_deprecated(): def __getattr__(name): """Forward attribute access to the new location with deprecation warning.""" if name in ( + "_context", "_device", + "_dlpack", "_event", "_graph", + "_kernel_arg_handler", "_launch_config", "_launcher", "_layout", "_linker", "_memory", + "_memoryview", "_module", "_program", "_stream", "_system", "_utils", - "_context", - "_dlpack", - "_kernel_arg_handler", - "_launch_config", - "_memoryview", ): _warn_deprecated() # Import the submodule from the new location From 798ce3d4259dd42cde334076e373921c61067df7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 09:20:46 -0800 Subject: [PATCH 23/26] Remove redundant TYPE_CHECKING block from experimental/__init__.py The TYPE_CHECKING block was unnecessary since all symbols are already imported at runtime below. Type checkers will see the types from the runtime imports, making the TYPE_CHECKING block redundant duplication. --- cuda_core/cuda/core/experimental/__init__.py | 33 -------------------- 1 file changed, 33 deletions(-) diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 832dce11b3..f937a3795a 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -13,39 +13,6 @@ """ import warnings -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - # For type checkers, import from the new location - from cuda.core import ( - Buffer, - Device, - DeviceMemoryResource, - DeviceMemoryResourceOptions, - Event, - EventOptions, - Graph, - GraphBuilder, - GraphCompleteOptions, - GraphDebugPrintOptions, - Kernel, - LaunchConfig, - LegacyPinnedMemoryResource, - Linker, - LinkerOptions, - MemoryResource, - ObjectCode, - Program, - ProgramOptions, - Stream, - StreamOptions, - StridedLayout, - VirtualMemoryResource, - VirtualMemoryResourceOptions, - launch, - system, - utils, - ) def _warn_deprecated(): From 09a78ccd0d33b5234d266f8e3f075bcc2a0dcdad Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 09:27:14 -0800 Subject: [PATCH 24/26] Update references from cuda.core.experimental to cuda.core - Update issue templates to use cuda.core.Program instead of cuda.core.experimental.Program - Update merge script to place versioned directories (cu12, cu13) under cuda/core/ instead of cuda/core/experimental/ - Fix duplicate line in merge script --- .github/ISSUE_TEMPLATE/bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/feature_request.yml | 6 +++--- ci/tools/merge_cuda_core_wheels.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 4574e04bf3..1e0b4d2c20 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -52,7 +52,7 @@ body: attributes: label: Describe the bug description: A clear and concise description of what problem you are running into. - placeholder: "Attempting to compile a program via `cuda.core.experimental.Program.compile` throws a `ValueError`." + placeholder: "Attempting to compile a program via `cuda.core.Program.compile` throws a `ValueError`." validations: required: true @@ -62,7 +62,7 @@ body: label: How to Reproduce description: Steps used to reproduce the bug. placeholder: | - 0. Construct a `cuda.core.experimental.Program` instance + 0. Construct a `cuda.core.Program` instance 1. Call the `.compile(...)` method of the instance 2. The call throws a `ValueError` with the following: ``` @@ -76,7 +76,7 @@ body: attributes: label: Expected behavior description: A clear and concise description of what you expected to happen. - placeholder: "Using `cuda.core.experimental.Program.compile(...)` should run successfully and not throw a `ValueError`" + placeholder: "Using `cuda.core.Program.compile(...)` should run successfully and not throw a `ValueError`" validations: required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index cbbc03c492..6d1504c4c4 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -36,7 +36,7 @@ body: attributes: label: Is your feature request related to a problem? Please describe. description: A clear and concise description of what the problem is, e.g., "I would like to be able to..." - placeholder: I would like to be able to use the equivalent of `cuda.core.experimental.Program.compile(...)` to compile my code to PTX. + placeholder: I would like to be able to use the equivalent of `cuda.core.Program.compile(...)` to compile my code to PTX. validations: required: true @@ -46,7 +46,7 @@ body: label: Describe the solution you'd like description: A clear and concise description of what you want to happen. placeholder: | - Support a `ptx` target_type in the `cuda.core.experimental.Program.compile(...)` function. + Support a `ptx` target_type in the `cuda.core.Program.compile(...)` function. validations: required: true @@ -57,7 +57,7 @@ body: description: If applicable, please add a clear and concise description of any alternative solutions or features you've considered. - placeholder: The alternatives to using `cuda.core.experimental.Program.compile(...)` are unappealing. They usually involve using lower level bindings to something like nvRTC or invoking the nvcc executable. + placeholder: The alternatives to using `cuda.core.Program.compile(...)` are unappealing. They usually involve using lower level bindings to something like nvRTC or invoking the nvcc executable. validations: required: false diff --git a/ci/tools/merge_cuda_core_wheels.py b/ci/tools/merge_cuda_core_wheels.py index 359b98d6a7..8ae9a266ad 100644 --- a/ci/tools/merge_cuda_core_wheels.py +++ b/ci/tools/merge_cuda_core_wheels.py @@ -12,8 +12,8 @@ In particular, each wheel contains a CUDA-specific build of the `cuda.core` library and the associated bindings. This script merges these directories into a single wheel -that supports both CUDA versions, i.e., containing both `cuda/core/experimental/cu12` -and `cuda/core/experimental/cu13`. At runtime, the code in `cuda/core/experimental/__init__.py` +that supports both CUDA versions, i.e., containing both `cuda/core/cu12` +and `cuda/core/cu13`. At runtime, the code in `cuda/core/__init__.py` is used to import the appropriate CUDA-specific bindings. This script is based on the one in NVIDIA/CCCL. @@ -98,7 +98,7 @@ def merge_wheels(wheels: List[Path], output_dir: Path) -> Path: # into the appropriate place in the base wheel for i, wheel_dir in enumerate(extracted_wheels): cuda_version = wheels[i].name.split(".cu")[1].split(".")[0] - base_dir = Path("cuda") / "core" / "experimental" + base_dir = Path("cuda") / "core" # Copy from other wheels print(f" Copying {wheel_dir} to {base_wheel}", file=sys.stderr) shutil.copytree(wheel_dir / base_dir, base_wheel / base_dir / f"cu{cuda_version}") From 516e088e60cee6cceeb96944c67b53dc2ec0887d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 19:22:33 -0800 Subject: [PATCH 25/26] Fix merge script to preserve Python modules in cuda/core The merge script was copying the entire cuda/core directory into versioned subdirectories and then removing all Python modules from cuda/core, causing ModuleNotFoundError when importing cuda.core. Fix: Only copy version-specific binaries (.so, .pyd, .dll) into versioned directories (cu12/, cu13/), and keep all Python modules in cuda/core/ where they belong. Since wheels no longer have experimental/ directory, we can simplify the merge logic compared to the original approach which assumed experimental/ contained only binaries. --- ci/tools/merge_cuda_core_wheels.py | 45 +++++++++++++++++------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/ci/tools/merge_cuda_core_wheels.py b/ci/tools/merge_cuda_core_wheels.py index 8ae9a266ad..eae72bc27b 100644 --- a/ci/tools/merge_cuda_core_wheels.py +++ b/ci/tools/merge_cuda_core_wheels.py @@ -94,27 +94,34 @@ def merge_wheels(wheels: List[Path], output_dir: Path) -> Path: # Use the first wheel as the base and merge binaries from others base_wheel = extracted_wheels[0] - # now copy the version-specific directory from other wheels - # into the appropriate place in the base wheel + # Copy version-specific binaries from each wheel into versioned subdirectories + # Note: Python modules stay in cuda/core/, only binaries go into cu12/cu13/ + base_dir = Path("cuda") / "core" + for i, wheel_dir in enumerate(extracted_wheels): cuda_version = wheels[i].name.split(".cu")[1].split(".")[0] - base_dir = Path("cuda") / "core" - # Copy from other wheels - print(f" Copying {wheel_dir} to {base_wheel}", file=sys.stderr) - shutil.copytree(wheel_dir / base_dir, base_wheel / base_dir / f"cu{cuda_version}") - - # Overwrite the __init__.py in versioned dirs - os.truncate(base_wheel / base_dir / f"cu{cuda_version}" / "__init__.py", 0) - - # The base dir should only contain __init__.py, the include dir, and the versioned dirs - files_to_remove = os.scandir(base_wheel / base_dir) - for f in files_to_remove: - f_abspath = f.path - if f.name not in ("__init__.py", "cu12", "cu13", "include"): - if f.is_dir(): - shutil.rmtree(f_abspath) - else: - os.remove(f_abspath) + versioned_dir = base_wheel / base_dir / f"cu{cuda_version}" + + # Create versioned directory + versioned_dir.mkdir(parents=True, exist_ok=True) + + # Copy only version-specific binaries (.so, .pyd, .dll files) from the source wheel + # Python modules (.py, .pyx, .pxd) remain in cuda/core/ + source_dir = wheel_dir / base_dir + for item in source_dir.rglob("*"): + if item.is_dir(): + continue + + # Only copy binary files, not Python source files + if item.suffix in (".so", ".pyd", ".dll"): + # Preserve directory structure relative to base_dir + rel_path = item.relative_to(source_dir) + dest_item = versioned_dir / rel_path + dest_item.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(item, dest_item) + + # Create empty __init__.py in versioned dirs + (versioned_dir / "__init__.py").touch() # Repack the merged wheel output_dir.mkdir(parents=True, exist_ok=True) From 475a1df240a54f8ef3e12e6f4f298990290db3a4 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 15 Dec 2025 20:12:33 -0800 Subject: [PATCH 26/26] Fix recursion in merge script by excluding versioned directories The rglob("*") was recursively searching through all subdirectories, including the cu12/ and cu13/ directories we just created. This caused files from cuda/core/cu13/ to be copied to cuda/core/cu13/cu13/, creating infinite recursion. Fix: Skip any files that are in versioned directories (cu12/, cu13/) when searching for binaries to copy. --- ci/tools/merge_cuda_core_wheels.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/tools/merge_cuda_core_wheels.py b/ci/tools/merge_cuda_core_wheels.py index eae72bc27b..14ed53c308 100644 --- a/ci/tools/merge_cuda_core_wheels.py +++ b/ci/tools/merge_cuda_core_wheels.py @@ -107,15 +107,19 @@ def merge_wheels(wheels: List[Path], output_dir: Path) -> Path: # Copy only version-specific binaries (.so, .pyd, .dll files) from the source wheel # Python modules (.py, .pyx, .pxd) remain in cuda/core/ + # Exclude versioned directories (cu12/, cu13/) to avoid recursion source_dir = wheel_dir / base_dir for item in source_dir.rglob("*"): if item.is_dir(): continue + # Skip files in versioned directories to avoid recursion + rel_path = item.relative_to(source_dir) + if any(part in ("cu12", "cu13") for part in rel_path.parts): + continue + # Only copy binary files, not Python source files if item.suffix in (".so", ".pyd", ".dll"): - # Preserve directory structure relative to base_dir - rel_path = item.relative_to(source_dir) dest_item = versioned_dir / rel_path dest_item.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(item, dest_item)