diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 4574e04bf3..1e0b4d2c20 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -52,7 +52,7 @@ body: attributes: label: Describe the bug description: A clear and concise description of what problem you are running into. - placeholder: "Attempting to compile a program via `cuda.core.experimental.Program.compile` throws a `ValueError`." + placeholder: "Attempting to compile a program via `cuda.core.Program.compile` throws a `ValueError`." validations: required: true @@ -62,7 +62,7 @@ body: label: How to Reproduce description: Steps used to reproduce the bug. placeholder: | - 0. Construct a `cuda.core.experimental.Program` instance + 0. Construct a `cuda.core.Program` instance 1. Call the `.compile(...)` method of the instance 2. The call throws a `ValueError` with the following: ``` @@ -76,7 +76,7 @@ body: attributes: label: Expected behavior description: A clear and concise description of what you expected to happen. - placeholder: "Using `cuda.core.experimental.Program.compile(...)` should run successfully and not throw a `ValueError`" + placeholder: "Using `cuda.core.Program.compile(...)` should run successfully and not throw a `ValueError`" validations: required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index cbbc03c492..6d1504c4c4 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -36,7 +36,7 @@ body: attributes: label: Is your feature request related to a problem? Please describe. description: A clear and concise description of what the problem is, e.g., "I would like to be able to..." - placeholder: I would like to be able to use the equivalent of `cuda.core.experimental.Program.compile(...)` to compile my code to PTX. + placeholder: I would like to be able to use the equivalent of `cuda.core.Program.compile(...)` to compile my code to PTX. validations: required: true @@ -46,7 +46,7 @@ body: label: Describe the solution you'd like description: A clear and concise description of what you want to happen. placeholder: | - Support a `ptx` target_type in the `cuda.core.experimental.Program.compile(...)` function. + Support a `ptx` target_type in the `cuda.core.Program.compile(...)` function. validations: required: true @@ -57,7 +57,7 @@ body: description: If applicable, please add a clear and concise description of any alternative solutions or features you've considered. - placeholder: The alternatives to using `cuda.core.experimental.Program.compile(...)` are unappealing. They usually involve using lower level bindings to something like nvRTC or invoking the nvcc executable. + placeholder: The alternatives to using `cuda.core.Program.compile(...)` are unappealing. They usually involve using lower level bindings to something like nvRTC or invoking the nvcc executable. validations: required: false diff --git a/.spdx-ignore b/.spdx-ignore index c7177752e1..a9d54c6c15 100644 --- a/.spdx-ignore +++ b/.spdx-ignore @@ -10,6 +10,6 @@ requirements*.txt cuda_bindings/examples/* # Vendored -cuda_core/cuda/core/experimental/include/dlpack.h +cuda_core/cuda/core/include/dlpack.h qa/ctk-next.drawio.svg diff --git a/ci/tools/merge_cuda_core_wheels.py b/ci/tools/merge_cuda_core_wheels.py index 359b98d6a7..14ed53c308 100644 --- a/ci/tools/merge_cuda_core_wheels.py +++ b/ci/tools/merge_cuda_core_wheels.py @@ -12,8 +12,8 @@ In particular, each wheel contains a CUDA-specific build of the `cuda.core` library and the associated bindings. This script merges these directories into a single wheel -that supports both CUDA versions, i.e., containing both `cuda/core/experimental/cu12` -and `cuda/core/experimental/cu13`. At runtime, the code in `cuda/core/experimental/__init__.py` +that supports both CUDA versions, i.e., containing both `cuda/core/cu12` +and `cuda/core/cu13`. At runtime, the code in `cuda/core/__init__.py` is used to import the appropriate CUDA-specific bindings. This script is based on the one in NVIDIA/CCCL. @@ -94,27 +94,38 @@ def merge_wheels(wheels: List[Path], output_dir: Path) -> Path: # Use the first wheel as the base and merge binaries from others base_wheel = extracted_wheels[0] - # now copy the version-specific directory from other wheels - # into the appropriate place in the base wheel + # Copy version-specific binaries from each wheel into versioned subdirectories + # Note: Python modules stay in cuda/core/, only binaries go into cu12/cu13/ + base_dir = Path("cuda") / "core" + for i, wheel_dir in enumerate(extracted_wheels): cuda_version = wheels[i].name.split(".cu")[1].split(".")[0] - base_dir = Path("cuda") / "core" / "experimental" - # Copy from other wheels - print(f" Copying {wheel_dir} to {base_wheel}", file=sys.stderr) - shutil.copytree(wheel_dir / base_dir, base_wheel / base_dir / f"cu{cuda_version}") - - # Overwrite the __init__.py in versioned dirs - os.truncate(base_wheel / base_dir / f"cu{cuda_version}" / "__init__.py", 0) - - # The base dir should only contain __init__.py, the include dir, and the versioned dirs - files_to_remove = os.scandir(base_wheel / base_dir) - for f in files_to_remove: - f_abspath = f.path - if f.name not in ("__init__.py", "cu12", "cu13", "include"): - if f.is_dir(): - shutil.rmtree(f_abspath) - else: - os.remove(f_abspath) + versioned_dir = base_wheel / base_dir / f"cu{cuda_version}" + + # Create versioned directory + versioned_dir.mkdir(parents=True, exist_ok=True) + + # Copy only version-specific binaries (.so, .pyd, .dll files) from the source wheel + # Python modules (.py, .pyx, .pxd) remain in cuda/core/ + # Exclude versioned directories (cu12/, cu13/) to avoid recursion + source_dir = wheel_dir / base_dir + for item in source_dir.rglob("*"): + if item.is_dir(): + continue + + # Skip files in versioned directories to avoid recursion + rel_path = item.relative_to(source_dir) + if any(part in ("cu12", "cu13") for part in rel_path.parts): + continue + + # Only copy binary files, not Python source files + if item.suffix in (".so", ".pyd", ".dll"): + dest_item = versioned_dir / rel_path + dest_item.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(item, dest_item) + + # Create empty __init__.py in versioned dirs + (versioned_dir / "__init__.py").touch() # Repack the merged wheel output_dir.mkdir(parents=True, exist_ok=True) diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py index e38f5676df..aef506762f 100644 --- a/cuda_core/build_hooks.py +++ b/cuda_core/build_hooks.py @@ -66,7 +66,7 @@ def _build_cuda_core(): # It seems setuptools' wildcard support has problems for namespace packages, # so we explicitly spell out all Extension instances. - root_module = "cuda.core.experimental" + root_module = "cuda.core" root_path = f"{os.path.sep}".join(root_module.split(".")) + os.path.sep ext_files = glob.glob(f"{root_path}/**/*.pyx", recursive=True) @@ -84,11 +84,16 @@ def get_cuda_paths(): print("CUDA paths:", CUDA_PATH) return CUDA_PATH + # Add local include directory for cuda/core/include + local_include_dirs = ["cuda/core"] + cuda_include_dirs = list(os.path.join(root, "include") for root in get_cuda_paths()) + all_include_dirs = local_include_dirs + cuda_include_dirs + ext_modules = tuple( Extension( - f"cuda.core.experimental.{mod.replace(os.path.sep, '.')}", - sources=[f"cuda/core/experimental/{mod}.pyx"], - include_dirs=list(os.path.join(root, "include") for root in get_cuda_paths()), + f"cuda.core.{mod.replace(os.path.sep, '.')}", + sources=[f"cuda/core/{mod}.pyx"], + include_dirs=all_include_dirs, language="c++", ) for mod in module_names diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py index 96a80d1f3e..d074be02d8 100644 --- a/cuda_core/cuda/core/__init__.py +++ b/cuda_core/cuda/core/__init__.py @@ -3,3 +3,59 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.core._version import __version__ + +try: + from cuda import bindings +except ImportError: + raise ImportError("cuda.bindings 12.x or 13.x must be installed") from None +else: + cuda_major, cuda_minor = bindings.__version__.split(".")[:2] + if cuda_major not in ("12", "13"): + raise ImportError("cuda.bindings 12.x or 13.x must be installed") + +import importlib + +subdir = f"cu{cuda_major}" +try: + versioned_mod = importlib.import_module(f".{subdir}", __package__) + # Import all symbols from the module + globals().update(versioned_mod.__dict__) +except ImportError: + # This is not a wheel build, but a conda or local build, do nothing + pass +else: + del versioned_mod +finally: + del bindings, importlib, subdir, cuda_major, cuda_minor + +from cuda.core import utils # noqa: E402 +from cuda.core._device import Device # noqa: E402 +from cuda.core._event import Event, EventOptions # noqa: E402 +from cuda.core._graph import ( # noqa: E402 + Graph, + GraphBuilder, + GraphCompleteOptions, + GraphDebugPrintOptions, +) +from cuda.core._launch_config import LaunchConfig # noqa: E402 +from cuda.core._launcher import launch # noqa: E402 +from cuda.core._layout import StridedLayout # noqa: E402 +from cuda.core._linker import Linker, LinkerOptions # noqa: E402 +from cuda.core._memory import ( # noqa: E402 + Buffer, + DeviceMemoryResource, + DeviceMemoryResourceOptions, + GraphMemoryResource, + LegacyPinnedMemoryResource, + MemoryResource, + VirtualMemoryResource, + VirtualMemoryResourceOptions, +) +from cuda.core._module import Kernel, ObjectCode # noqa: E402 +from cuda.core._program import Program, ProgramOptions # noqa: E402 +from cuda.core._stream import Stream, StreamOptions # noqa: E402 +from cuda.core._system import System # noqa: E402 + +system = System() +__import__("sys").modules[__spec__.name + ".system"] = system +del System diff --git a/cuda_core/cuda/core/experimental/__init__.pxd b/cuda_core/cuda/core/__init__experimental.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/__init__.pxd rename to cuda_core/cuda/core/__init__experimental.pxd diff --git a/cuda_core/cuda/core/experimental/_context.pyx b/cuda_core/cuda/core/_context.pyx similarity index 94% rename from cuda_core/cuda/core/experimental/_context.pyx rename to cuda_core/cuda/core/_context.pyx index f9858c1710..c1c28b3389 100644 --- a/cuda_core/cuda/core/experimental/_context.pyx +++ b/cuda_core/cuda/core/_context.pyx @@ -4,7 +4,7 @@ from dataclasses import dataclass -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._utils.cuda_utils import driver @dataclass diff --git a/cuda_core/cuda/core/experimental/_device.pyx b/cuda_core/cuda/core/_device.pyx similarity index 98% rename from cuda_core/cuda/core/experimental/_device.pyx rename to cuda_core/cuda/core/_device.pyx index cd802943a5..7f1582e179 100644 --- a/cuda_core/cuda/core/experimental/_device.pyx +++ b/cuda_core/cuda/core/_device.pyx @@ -6,27 +6,27 @@ cimport cpython from libc.stdint cimport uintptr_t from cuda.bindings cimport cydriver -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN import threading from typing import Optional, TYPE_CHECKING, Union -from cuda.core.experimental._context import Context, ContextOptions -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._graph import GraphBuilder -from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._context import Context, ContextOptions +from cuda.core._event import Event, EventOptions +from cuda.core._graph import GraphBuilder +from cuda.core._stream import IsStreamT, Stream, StreamOptions +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( ComputeCapability, CUDAError, driver, handle_return, runtime, ) -from cuda.core.experimental._stream cimport default_stream +from cuda.core._stream cimport default_stream if TYPE_CHECKING: - from cuda.core.experimental._memory import Buffer, MemoryResource + from cuda.core._memory import Buffer, MemoryResource # TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python, # but it seems it is very convenient to expose them for testing purposes... @@ -1154,17 +1154,17 @@ class Device: ) ) if attr == 1: - from cuda.core.experimental._memory import DeviceMemoryResource + from cuda.core._memory import DeviceMemoryResource self._memory_resource = DeviceMemoryResource(self._id) else: - from cuda.core.experimental._memory import _SynchronousMemoryResource + from cuda.core._memory import _SynchronousMemoryResource self._memory_resource = _SynchronousMemoryResource(self._id) return self._memory_resource @memory_resource.setter def memory_resource(self, mr): - from cuda.core.experimental._memory import MemoryResource + from cuda.core._memory import MemoryResource assert_type(mr, MemoryResource) self._memory_resource = mr @@ -1223,7 +1223,7 @@ class Device: Acts as an entry point of this object. Users always start a code by calling this method, e.g. - >>> from cuda.core.experimental import Device + >>> from cuda.core import Device >>> dev0 = Device(0) >>> dev0.set_current() >>> # ... do work on device 0 ... diff --git a/cuda_core/cuda/core/experimental/_dlpack.pxd b/cuda_core/cuda/core/_dlpack.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_dlpack.pxd rename to cuda_core/cuda/core/_dlpack.pxd diff --git a/cuda_core/cuda/core/experimental/_dlpack.pyx b/cuda_core/cuda/core/_dlpack.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_dlpack.pyx rename to cuda_core/cuda/core/_dlpack.pyx diff --git a/cuda_core/cuda/core/experimental/_event.pxd b/cuda_core/cuda/core/_event.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_event.pxd rename to cuda_core/cuda/core/_event.pxd diff --git a/cuda_core/cuda/core/experimental/_event.pyx b/cuda_core/cuda/core/_event.pyx similarity index 98% rename from cuda_core/cuda/core/experimental/_event.pyx rename to cuda_core/cuda/core/_event.pyx index 149c92b8e1..e97fdfbab4 100644 --- a/cuda_core/cuda/core/experimental/_event.pyx +++ b/cuda_core/cuda/core/_event.pyx @@ -8,7 +8,7 @@ cimport cpython from libc.stdint cimport uintptr_t from libc.string cimport memcpy from cuda.bindings cimport cydriver -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, HANDLE_RETURN ) @@ -18,8 +18,8 @@ from dataclasses import dataclass import multiprocessing from typing import TYPE_CHECKING, Optional -from cuda.core.experimental._context import Context -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._context import Context +from cuda.core._utils.cuda_utils import ( CUDAError, check_multiprocessing_start_method, driver, diff --git a/cuda_core/cuda/core/experimental/_graph.py b/cuda_core/cuda/core/_graph.py similarity index 99% rename from cuda_core/cuda/core/experimental/_graph.py rename to cuda_core/cuda/core/_graph.py index a82bd70f55..df51126bb0 100644 --- a/cuda_core/cuda/core/experimental/_graph.py +++ b/cuda_core/cuda/core/_graph.py @@ -9,8 +9,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.cuda_utils import ( + from cuda.core._stream import Stream +from cuda.core._utils.cuda_utils import ( driver, get_binding_version, handle_return, diff --git a/cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx b/cuda_core/cuda/core/_kernel_arg_handler.pyx similarity index 99% rename from cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx rename to cuda_core/cuda/core/_kernel_arg_handler.pyx index 4cac74a25f..6374605262 100644 --- a/cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx +++ b/cuda_core/cuda/core/_kernel_arg_handler.pyx @@ -15,8 +15,8 @@ import ctypes import numpy -from cuda.core.experimental._memory import Buffer -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._memory import Buffer +from cuda.core._utils.cuda_utils import driver from cuda.bindings cimport cydriver diff --git a/cuda_core/cuda/core/experimental/_launch_config.pxd b/cuda_core/cuda/core/_launch_config.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_launch_config.pxd rename to cuda_core/cuda/core/_launch_config.pxd diff --git a/cuda_core/cuda/core/experimental/_launch_config.pyx b/cuda_core/cuda/core/_launch_config.pyx similarity index 98% rename from cuda_core/cuda/core/experimental/_launch_config.pyx rename to cuda_core/cuda/core/_launch_config.pyx index 7d6a1ab2b9..00c71ad903 100644 --- a/cuda_core/cuda/core/experimental/_launch_config.pyx +++ b/cuda_core/cuda/core/_launch_config.pyx @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._device import Device -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._utils.cuda_utils import ( CUDAError, cast_to_3_tuple, driver, diff --git a/cuda_core/cuda/core/experimental/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx similarity index 88% rename from cuda_core/cuda/core/experimental/_launcher.pyx rename to cuda_core/cuda/core/_launcher.pyx index 2cba15cbf4..09900a668c 100644 --- a/cuda_core/cuda/core/experimental/_launcher.pyx +++ b/cuda_core/cuda/core/_launcher.pyx @@ -1,15 +1,15 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._launch_config cimport LaunchConfig, _to_native_launch_config -from cuda.core.experimental._stream cimport Stream_accept +from cuda.core._launch_config cimport LaunchConfig, _to_native_launch_config +from cuda.core._stream cimport Stream_accept -from cuda.core.experimental._kernel_arg_handler import ParamHolder -from cuda.core.experimental._module import Kernel -from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._kernel_arg_handler import ParamHolder +from cuda.core._module import Kernel +from cuda.core._stream import Stream +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( _reduce_3_tuple, check_or_create_options, driver, diff --git a/cuda_core/cuda/core/experimental/_layout.pxd b/cuda_core/cuda/core/_layout.pxd similarity index 99% rename from cuda_core/cuda/core/experimental/_layout.pxd rename to cuda_core/cuda/core/_layout.pxd index 301cdaaa65..d2306117e0 100644 --- a/cuda_core/cuda/core/experimental/_layout.pxd +++ b/cuda_core/cuda/core/_layout.pxd @@ -18,7 +18,7 @@ ctypedef uint32_t property_mask_t ctypedef vector.vector[stride_t] extents_strides_t ctypedef vector.vector[axis_t] axis_vec_t -from cuda.core.experimental._utils cimport cuda_utils +from cuda.core._utils cimport cuda_utils ctypedef fused integer_t: diff --git a/cuda_core/cuda/core/experimental/_layout.pyx b/cuda_core/cuda/core/_layout.pyx similarity index 100% rename from cuda_core/cuda/core/experimental/_layout.pyx rename to cuda_core/cuda/core/_layout.pyx diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/_linker.py similarity index 98% rename from cuda_core/cuda/core/experimental/_linker.py rename to cuda_core/cuda/core/_linker.py index 5c54a88c8c..2d2e47bb3e 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/_linker.py @@ -15,10 +15,10 @@ if TYPE_CHECKING: import cuda.bindings -from cuda.core.experimental._device import Device -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import check_or_create_options, driver, handle_return, is_sequence +from cuda.core._device import Device +from cuda.core._module import ObjectCode +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import check_or_create_options, driver, handle_return, is_sequence # TODO: revisit this treatment for py313t builds _driver = None # populated if nvJitLink cannot be used @@ -355,7 +355,7 @@ def _exception_manager(self): class Linker: """Represent a linking machinery to link one or multiple object codes into - :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options. + :obj:`~cuda.core._module.ObjectCode` with the specified options. This object provides a unified interface to multiple underlying linker libraries (such as nvJitLink or cuLink* from CUDA driver). diff --git a/cuda_core/cuda/core/experimental/_memory/__init__.py b/cuda_core/cuda/core/_memory/__init__.py similarity index 100% rename from cuda_core/cuda/core/experimental/_memory/__init__.py rename to cuda_core/cuda/core/_memory/__init__.py diff --git a/cuda_core/cuda/core/experimental/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd similarity index 92% rename from cuda_core/cuda/core/experimental/_memory/_buffer.pxd rename to cuda_core/cuda/core/_memory/_buffer.pxd index b581dcd293..730e448f63 100644 --- a/cuda_core/cuda/core/experimental/_memory/_buffer.pxd +++ b/cuda_core/cuda/core/_memory/_buffer.pxd @@ -4,7 +4,7 @@ from libc.stdint cimport uintptr_t -from cuda.core.experimental._stream cimport Stream +from cuda.core._stream cimport Stream cdef struct _MemAttrs: diff --git a/cuda_core/cuda/core/experimental/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx similarity index 97% rename from cuda_core/cuda/core/experimental/_memory/_buffer.pyx rename to cuda_core/cuda/core/_memory/_buffer.pyx index b26471ed0e..45c30aa933 100644 --- a/cuda_core/cuda/core/experimental/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -8,18 +8,18 @@ cimport cython from libc.stdint cimport uintptr_t, int64_t, uint64_t from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource -from cuda.core.experimental._memory._ipc cimport IPCBufferDescriptor, IPCDataForBuffer -from cuda.core.experimental._memory cimport _ipc -from cuda.core.experimental._stream cimport Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._memory._device_memory_resource cimport DeviceMemoryResource +from cuda.core._memory._ipc cimport IPCBufferDescriptor, IPCDataForBuffer +from cuda.core._memory cimport _ipc +from cuda.core._stream cimport Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN import abc from typing import TypeVar, Union -from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule -from cuda.core.experimental._utils.cuda_utils import driver -from cuda.core.experimental._device import Device +from cuda.core._dlpack import DLDeviceType, make_py_capsule +from cuda.core._utils.cuda_utils import driver +from cuda.core._device import Device __all__ = ['Buffer', 'MemoryResource'] diff --git a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pxd b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd similarity index 82% rename from cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pxd rename to cuda_core/cuda/core/_memory/_device_memory_resource.pxd index 823a270b27..4c7482a5fe 100644 --- a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pxd +++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport MemoryResource -from cuda.core.experimental._memory._ipc cimport IPCDataForMR +from cuda.core._memory._buffer cimport MemoryResource +from cuda.core._memory._ipc cimport IPCDataForMR cdef class DeviceMemoryResource(MemoryResource): diff --git a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx similarity index 98% rename from cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx rename to cuda_core/cuda/core/_memory/_device_memory_resource.pyx index ac18079a62..bc286dd62e 100644 --- a/cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx @@ -10,11 +10,11 @@ from libc.stdlib cimport malloc, free from libc.string cimport memset from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer, MemoryResource -from cuda.core.experimental._memory cimport _ipc -from cuda.core.experimental._memory._ipc cimport IPCAllocationHandle, IPCDataForMR -from cuda.core.experimental._stream cimport default_stream, Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._memory._buffer cimport Buffer, MemoryResource +from cuda.core._memory cimport _ipc +from cuda.core._memory._ipc cimport IPCAllocationHandle, IPCDataForMR +from cuda.core._stream cimport default_stream, Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, HANDLE_RETURN, ) @@ -25,10 +25,10 @@ import platform # no-cython-lint import uuid import weakref -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core._utils.cuda_utils import driver if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT from .._device import Device __all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions'] diff --git a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pxd b/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd similarity index 77% rename from cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pxd rename to cuda_core/cuda/core/_memory/_graph_memory_resource.pxd index f9c7798e76..2f6c35d72e 100644 --- a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pxd +++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._memory._buffer cimport MemoryResource +from cuda.core._memory._buffer cimport MemoryResource cdef class cyGraphMemoryResource(MemoryResource): diff --git a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx similarity index 96% rename from cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pyx rename to cuda_core/cuda/core/_memory/_graph_memory_resource.pyx index c65354b612..bda075c201 100644 --- a/cuda_core/cuda/core/experimental/_memory/_graph_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx @@ -7,15 +7,15 @@ from __future__ import annotations from libc.stdint cimport intptr_t from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer, MemoryResource -from cuda.core.experimental._stream cimport default_stream, Stream_accept, Stream -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._memory._buffer cimport Buffer, MemoryResource +from cuda.core._stream cimport default_stream, Stream_accept, Stream +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN from functools import cache from typing import TYPE_CHECKING if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT __all__ = ['GraphMemoryResource'] diff --git a/cuda_core/cuda/core/experimental/_memory/_ipc.pxd b/cuda_core/cuda/core/_memory/_ipc.pxd similarity index 92% rename from cuda_core/cuda/core/experimental/_memory/_ipc.pxd rename to cuda_core/cuda/core/_memory/_ipc.pxd index 60d96a3b33..c8d4a8a9ae 100644 --- a/cuda_core/cuda/core/experimental/_memory/_ipc.pxd +++ b/cuda_core/cuda/core/_memory/_ipc.pxd @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer -from cuda.core.experimental._memory._device_memory_resource cimport DeviceMemoryResource +from cuda.core._memory._buffer cimport Buffer +from cuda.core._memory._device_memory_resource cimport DeviceMemoryResource # Holds DeviceMemoryResource objects imported by this process. This enables diff --git a/cuda_core/cuda/core/experimental/_memory/_ipc.pyx b/cuda_core/cuda/core/_memory/_ipc.pyx similarity index 96% rename from cuda_core/cuda/core/experimental/_memory/_ipc.pyx rename to cuda_core/cuda/core/_memory/_ipc.pyx index c9931855cf..81ae52f6b0 100644 --- a/cuda_core/cuda/core/experimental/_memory/_ipc.pyx +++ b/cuda_core/cuda/core/_memory/_ipc.pyx @@ -7,10 +7,10 @@ from libc.stdint cimport uintptr_t from libc.string cimport memcpy from cuda.bindings cimport cydriver -from cuda.core.experimental._memory._buffer cimport Buffer -from cuda.core.experimental._stream cimport default_stream -from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN -from cuda.core.experimental._utils.cuda_utils import check_multiprocessing_start_method +from cuda.core._memory._buffer cimport Buffer +from cuda.core._stream cimport default_stream +from cuda.core._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._utils.cuda_utils import check_multiprocessing_start_method import multiprocessing import os diff --git a/cuda_core/cuda/core/experimental/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py similarity index 89% rename from cuda_core/cuda/core/experimental/_memory/_legacy.py rename to cuda_core/cuda/core/_memory/_legacy.py index 09ea0e15d2..317494ea9e 100644 --- a/cuda_core/cuda/core/experimental/_memory/_legacy.py +++ b/cuda_core/cuda/core/_memory/_legacy.py @@ -6,16 +6,16 @@ from typing import TYPE_CHECKING -from cuda.core.experimental._memory._buffer import Buffer, MemoryResource -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._memory._buffer import Buffer, MemoryResource +from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._utils.cuda_utils import ( driver, ) if TYPE_CHECKING: - from cuda.core.experimental._memory.buffer import DevicePointerT + from cuda.core._memory.buffer import DevicePointerT __all__ = ["LegacyPinnedMemoryResource", "_SynchronousMemoryResource"] @@ -43,7 +43,7 @@ def allocate(self, size, stream=None) -> Buffer: The allocated buffer object, which is accessible on both host and device. """ if stream is None: - from cuda.core.experimental._stream import default_stream + from cuda.core._stream import default_stream stream = default_stream() err, ptr = driver.cuMemAllocHost(size) @@ -93,7 +93,7 @@ def __init__(self, device_id): def allocate(self, size, stream=None) -> Buffer: if stream is None: - from cuda.core.experimental._stream import default_stream + from cuda.core._stream import default_stream stream = default_stream() err, ptr = driver.cuMemAlloc(size) diff --git a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py similarity index 98% rename from cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py rename to cuda_core/cuda/core/_memory/_virtual_memory_resource.py index 2806e2d0d5..43da00744a 100644 --- a/cuda_core/cuda/core/experimental/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py @@ -7,20 +7,20 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Iterable, Literal, Union -from cuda.core.experimental._device import Device -from cuda.core.experimental._memory._buffer import Buffer, MemoryResource -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._memory._buffer import Buffer, MemoryResource +from cuda.core._utils.cuda_utils import ( Transaction, check_or_create_options, driver, get_binding_version, ) -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) if TYPE_CHECKING: - from cuda.core.experimental._stream import Stream + from cuda.core._stream import Stream __all__ = ["VirtualMemoryResourceOptions", "VirtualMemoryResource"] diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx similarity index 99% rename from cuda_core/cuda/core/experimental/_memoryview.pyx rename to cuda_core/cuda/core/_memoryview.pyx index dc972d912a..6c329dc4ff 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/_memoryview.pyx @@ -4,8 +4,8 @@ from ._dlpack cimport * from libc.stdint cimport intptr_t -from cuda.core.experimental._layout cimport StridedLayout -from cuda.core.experimental._stream import Stream +from cuda.core._layout cimport StridedLayout +from cuda.core._stream import Stream import functools import warnings @@ -13,10 +13,10 @@ from typing import Optional import numpy -from cuda.core.experimental._utils.cuda_utils import handle_return, driver +from cuda.core._utils.cuda_utils import handle_return, driver -from cuda.core.experimental._memory import Buffer +from cuda.core._memory import Buffer # TODO(leofang): support NumPy structured dtypes diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/_module.py similarity index 98% rename from cuda_core/cuda/core/experimental/_module.py rename to cuda_core/cuda/core/_module.py index 9af722465b..fbea314406 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/_module.py @@ -7,15 +7,15 @@ from typing import Union from warnings import warn -from cuda.core.experimental._device import Device -from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config -from cuda.core.experimental._stream import Stream -from cuda.core.experimental._utils.clear_error_support import ( +from cuda.core._device import Device +from cuda.core._launch_config import LaunchConfig, _to_native_launch_config +from cuda.core._stream import Stream +from cuda.core._utils.clear_error_support import ( assert_type, assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable, ) -from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition +from cuda.core._utils.cuda_utils import driver, get_binding_version, handle_return, precondition _backend = { "old": { @@ -453,7 +453,7 @@ class ObjectCode: This class has no default constructor. If you already have a cubin that you would like to load, use the :meth:`from_cubin` alternative constructor. Constructing directly from all other possible code types should be avoided in favor of compilation through - :class:`~cuda.core.experimental.Program` + :class:`~cuda.core.Program` Note ---- diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/_program.py similarity index 98% rename from cuda_core/cuda/core/experimental/_program.py rename to cuda_core/cuda/core/_program.py index cdef7c3be6..6344991e30 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/_program.py @@ -13,11 +13,11 @@ if TYPE_CHECKING: import cuda.bindings -from cuda.core.experimental._device import Device -from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.clear_error_support import assert_type -from cuda.core.experimental._utils.cuda_utils import ( +from cuda.core._device import Device +from cuda.core._linker import Linker, LinkerHandleT, LinkerOptions +from cuda.core._module import ObjectCode +from cuda.core._utils.clear_error_support import assert_type +from cuda.core._utils.cuda_utils import ( _handle_boolean_option, check_or_create_options, driver, diff --git a/cuda_core/cuda/core/experimental/_stream.pxd b/cuda_core/cuda/core/_stream.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_stream.pxd rename to cuda_core/cuda/core/_stream.pxd diff --git a/cuda_core/cuda/core/experimental/_stream.pyx b/cuda_core/cuda/core/_stream.pyx similarity index 97% rename from cuda_core/cuda/core/experimental/_stream.pyx rename to cuda_core/cuda/core/_stream.pyx index 87ec4a691a..b724f9aee3 100644 --- a/cuda_core/cuda/core/experimental/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -9,8 +9,8 @@ from libc.stdlib cimport strtol, getenv from cuda.bindings cimport cydriver -from cuda.core.experimental._event cimport Event as cyEvent -from cuda.core.experimental._utils.cuda_utils cimport ( +from cuda.core._event cimport Event as cyEvent +from cuda.core._utils.cuda_utils cimport ( check_or_create_options, CU_CONTEXT_INVALID, get_device_from_ctx, @@ -24,11 +24,11 @@ from typing import TYPE_CHECKING, Optional, Protocol, Union if TYPE_CHECKING: import cuda.bindings - from cuda.core.experimental._device import Device -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._graph import GraphBuilder -from cuda.core.experimental._utils.cuda_utils import ( + from cuda.core._device import Device +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._graph import GraphBuilder +from cuda.core._utils.cuda_utils import ( driver, ) @@ -311,7 +311,7 @@ cdef class Stream: context is set current after a stream is created. """ - from cuda.core.experimental._device import Device # avoid circular import + from cuda.core._device import Device # avoid circular import self._get_device_and_context() return Device((self._device_id)) diff --git a/cuda_core/cuda/core/experimental/_system.py b/cuda_core/cuda/core/_system.py similarity index 92% rename from cuda_core/cuda/core/experimental/_system.py rename to cuda_core/cuda/core/_system.py index cbbc1a83cb..a8338114b2 100644 --- a/cuda_core/cuda/core/experimental/_system.py +++ b/cuda_core/cuda/core/_system.py @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._device import Device -from cuda.core.experimental._utils.cuda_utils import driver, handle_return, runtime +from cuda.core._device import Device +from cuda.core._utils.cuda_utils import driver, handle_return, runtime class System: diff --git a/cuda_core/cuda/core/experimental/_utils/__init__.pxd b/cuda_core/cuda/core/_utils/__init__.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/__init__.pxd rename to cuda_core/cuda/core/_utils/__init__.pxd diff --git a/cuda_core/cuda/core/experimental/_utils/__init__.py b/cuda_core/cuda/core/_utils/__init__.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/__init__.py rename to cuda_core/cuda/core/_utils/__init__.py diff --git a/cuda_core/cuda/core/experimental/_utils/clear_error_support.py b/cuda_core/cuda/core/_utils/clear_error_support.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/clear_error_support.py rename to cuda_core/cuda/core/_utils/clear_error_support.py diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd b/cuda_core/cuda/core/_utils/cuda_utils.pxd similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/cuda_utils.pxd rename to cuda_core/cuda/core/_utils/cuda_utils.pxd diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx similarity index 98% rename from cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx rename to cuda_core/cuda/core/_utils/cuda_utils.pyx index 4489871747..0c3f6521a4 100644 --- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx @@ -20,8 +20,8 @@ except ImportError: from cuda import cudart as runtime from cuda import nvrtc -from cuda.core.experimental._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS -from cuda.core.experimental._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS +from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS +from cuda.core._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS class CUDAError(Exception): diff --git a/cuda_core/cuda/core/experimental/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/driver_cu_result_explanations.py rename to cuda_core/cuda/core/_utils/driver_cu_result_explanations.py diff --git a/cuda_core/cuda/core/experimental/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py similarity index 100% rename from cuda_core/cuda/core/experimental/_utils/runtime_cuda_error_explanations.py rename to cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py index 826ea70b97..f937a3795a 100644 --- a/cuda_core/cuda/core/experimental/__init__.py +++ b/cuda_core/cuda/core/experimental/__init__.py @@ -2,43 +2,59 @@ # # SPDX-License-Identifier: Apache-2.0 -try: - from cuda import bindings -except ImportError: - raise ImportError("cuda.bindings 12.x or 13.x must be installed") from None -else: - cuda_major, cuda_minor = bindings.__version__.split(".")[:2] - if cuda_major not in ("12", "13"): - raise ImportError("cuda.bindings 12.x or 13.x must be installed") - -import importlib - -subdir = f"cu{cuda_major}" -try: - versioned_mod = importlib.import_module(f".{subdir}", __package__) - # Import all symbols from the module - globals().update(versioned_mod.__dict__) -except ImportError: - # This is not a wheel build, but a conda or local build, do nothing - pass -else: - del versioned_mod -finally: - del bindings, importlib, subdir, cuda_major, cuda_minor - -from cuda.core.experimental import utils # noqa: E402 -from cuda.core.experimental._device import Device # noqa: E402 -from cuda.core.experimental._event import Event, EventOptions # noqa: E402 -from cuda.core.experimental._graph import ( # noqa: E402 +""" +Backward compatibility stubs for cuda.core.experimental namespace. + +This module provides forwarding stubs that import from the new cuda.core.* +locations and emit deprecation warnings. Users should migrate to importing +directly from cuda.core instead of cuda.core.experimental. + +The experimental namespace will be removed in a future release. +""" + +import warnings + + +def _warn_deprecated(): + """Emit a deprecation warning for using the experimental namespace. + + Note: This warning is only when the experimental module is first imported. + Subsequent accesses to attributes (like utils, Device, etc.) do not trigger + additional warnings since they are already set in the module namespace. + Only accessing submodules via __getattr__ (e.g., _device, _utils) will trigger + additional warnings. + """ + warnings.warn( + "The cuda.core.experimental namespace is deprecated. " + "Please import directly from cuda.core instead. " + "For example, use 'from cuda.core import Device' instead of " + "'from cuda.core.experimental import Device'. " + "The experimental namespace will be removed in a future release.", + DeprecationWarning, + stacklevel=3, + ) + + +# Import from new locations and re-export +_warn_deprecated() + +from cuda.core import utils # noqa: E402 + +# Make utils accessible as a submodule for backward compatibility +__import__("sys").modules[__spec__.name + ".utils"] = utils +from cuda.core._device import Device # noqa: E402 +from cuda.core._event import Event, EventOptions # noqa: E402 +from cuda.core._graph import ( # noqa: E402 Graph, GraphBuilder, GraphCompleteOptions, GraphDebugPrintOptions, ) -from cuda.core.experimental._launch_config import LaunchConfig # noqa: E402 -from cuda.core.experimental._launcher import launch # noqa: E402 -from cuda.core.experimental._linker import Linker, LinkerOptions # noqa: E402 -from cuda.core.experimental._memory import ( # noqa: E402 +from cuda.core._launch_config import LaunchConfig # noqa: E402 +from cuda.core._launcher import launch # noqa: E402 +from cuda.core._layout import StridedLayout # noqa: E402 +from cuda.core._linker import Linker, LinkerOptions # noqa: E402 +from cuda.core._memory import ( # noqa: E402 Buffer, DeviceMemoryResource, DeviceMemoryResourceOptions, @@ -48,11 +64,47 @@ VirtualMemoryResource, VirtualMemoryResourceOptions, ) -from cuda.core.experimental._module import Kernel, ObjectCode # noqa: E402 -from cuda.core.experimental._program import Program, ProgramOptions # noqa: E402 -from cuda.core.experimental._stream import Stream, StreamOptions # noqa: E402 -from cuda.core.experimental._system import System # noqa: E402 +from cuda.core._module import Kernel, ObjectCode # noqa: E402 +from cuda.core._program import Program, ProgramOptions # noqa: E402 +from cuda.core._stream import Stream, StreamOptions # noqa: E402 +from cuda.core._system import System # noqa: E402 system = System() __import__("sys").modules[__spec__.name + ".system"] = system del System + + +# Also create forwarding stubs for submodules +# These will be imported lazily when accessed +def __getattr__(name): + """Forward attribute access to the new location with deprecation warning.""" + if name in ( + "_context", + "_device", + "_dlpack", + "_event", + "_graph", + "_kernel_arg_handler", + "_launch_config", + "_launcher", + "_layout", + "_linker", + "_memory", + "_memoryview", + "_module", + "_program", + "_stream", + "_system", + "_utils", + ): + _warn_deprecated() + # Import the submodule from the new location + import importlib + + new_name = name.lstrip("_") + try: + return importlib.import_module(f"cuda.core.{new_name}") + except ImportError: + # Fallback to underscore-prefixed name + return importlib.import_module(f"cuda.core.{name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/cuda_core/cuda/core/experimental/include/dlpack.h b/cuda_core/cuda/core/include/dlpack.h similarity index 100% rename from cuda_core/cuda/core/experimental/include/dlpack.h rename to cuda_core/cuda/core/include/dlpack.h diff --git a/cuda_core/cuda/core/experimental/include/layout.hpp b/cuda_core/cuda/core/include/layout.hpp similarity index 100% rename from cuda_core/cuda/core/experimental/include/layout.hpp rename to cuda_core/cuda/core/include/layout.hpp diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/include/utility.hpp similarity index 100% rename from cuda_core/cuda/core/experimental/include/utility.hpp rename to cuda_core/cuda/core/include/utility.hpp diff --git a/cuda_core/cuda/core/experimental/utils.py b/cuda_core/cuda/core/utils.py similarity index 66% rename from cuda_core/cuda/core/experimental/utils.py rename to cuda_core/cuda/core/utils.py index 3227f1eae1..177adaef2f 100644 --- a/cuda_core/cuda/core/experimental/utils.py +++ b/cuda_core/cuda/core/utils.py @@ -2,8 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._layout import StridedLayout # noqa: F401 -from cuda.core.experimental._memoryview import ( +from cuda.core._layout import StridedLayout # noqa: F401 +from cuda.core._memoryview import ( StridedMemoryView, # noqa: F401 args_viewable_as_strided_memory, # noqa: F401 ) diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index bab2a2b942..47147965ff 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -129,8 +129,8 @@ def skip_member(app, what, name, obj, skip, options): # are assumed to be properties (because cythonized # properties are not recognized as such by autodoc) excluded_dirs = [ - "cuda.core.experimental._layout", - "cuda.core.experimental._memoryview", + "cuda.core._layout", + "cuda.core._memoryview", ] if what == "attribute" and getattr(obj, "__doc__", None) is None: obj_module = getattr(getattr(obj, "__objclass__", None), "__module__", None) diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py index 2d2d9833fb..9cc759b500 100644 --- a/cuda_core/examples/cuda_graphs.py +++ b/cuda_core/examples/cuda_graphs.py @@ -13,7 +13,7 @@ import time import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch def main(): diff --git a/cuda_core/examples/jit_lto_fractal.py b/cuda_core/examples/jit_lto_fractal.py index d1553f6b67..b0040708b6 100644 --- a/cuda_core/examples/jit_lto_fractal.py +++ b/cuda_core/examples/jit_lto_fractal.py @@ -25,7 +25,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Linker, LinkerOptions, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Linker, LinkerOptions, Program, ProgramOptions, launch # ################################################################################ diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py index c4abd06e2c..123b1f6a11 100644 --- a/cuda_core/examples/memory_ops.py +++ b/cuda_core/examples/memory_ops.py @@ -16,7 +16,7 @@ import cupy as cp import numpy as np -from cuda.core.experimental import ( +from cuda.core import ( Device, LaunchConfig, LegacyPinnedMemoryResource, diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py index ea067302b9..433d63c9eb 100644 --- a/cuda_core/examples/pytorch_example.py +++ b/cuda_core/examples/pytorch_example.py @@ -15,7 +15,7 @@ import sys import torch -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # SAXPY kernel - passing a as a pointer to avoid any type issues code = """ diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py index f38caef392..aa0d77eff9 100644 --- a/cuda_core/examples/saxpy.py +++ b/cuda_core/examples/saxpy.py @@ -14,7 +14,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # compute out = a * x + y code = """ diff --git a/cuda_core/examples/show_device_properties.py b/cuda_core/examples/show_device_properties.py index 8fcecd2d4c..41609de8e5 100644 --- a/cuda_core/examples/show_device_properties.py +++ b/cuda_core/examples/show_device_properties.py @@ -11,7 +11,7 @@ import sys -from cuda.core.experimental import Device, system +from cuda.core import Device, system # Convert boolean to YES or NO string diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py index d91ab2c856..c53c1b518a 100644 --- a/cuda_core/examples/simple_multi_gpu_example.py +++ b/cuda_core/examples/simple_multi_gpu_example.py @@ -12,7 +12,7 @@ import sys import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, launch, system +from cuda.core import Device, LaunchConfig, Program, launch, system if system.num_devices < 2: print("this example requires at least 2 GPUs", file=sys.stderr) diff --git a/cuda_core/examples/strided_memory_view_cpu.py b/cuda_core/examples/strided_memory_view_cpu.py index de6007fd26..a20377cc76 100644 --- a/cuda_core/examples/strided_memory_view_cpu.py +++ b/cuda_core/examples/strided_memory_view_cpu.py @@ -26,7 +26,7 @@ print("cffi is not installed, the CPU example will be skipped", file=sys.stderr) FFI = None import numpy as np -from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory # ################################################################################ # diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py index 3e456776a8..e91ddc25cc 100644 --- a/cuda_core/examples/strided_memory_view_gpu.py +++ b/cuda_core/examples/strided_memory_view_gpu.py @@ -23,8 +23,8 @@ print("cupy is not installed, the GPU example will be skipped", file=sys.stderr) cp = None import numpy as np -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch -from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory # ################################################################################ # diff --git a/cuda_core/examples/thread_block_cluster.py b/cuda_core/examples/thread_block_cluster.py index e14158f8bd..f1ea8b8579 100644 --- a/cuda_core/examples/thread_block_cluster.py +++ b/cuda_core/examples/thread_block_cluster.py @@ -13,7 +13,7 @@ import sys import numpy as np -from cuda.core.experimental import ( +from cuda.core import ( Device, LaunchConfig, LegacyPinnedMemoryResource, diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py index 2851303c7e..d31ab77208 100644 --- a/cuda_core/examples/vector_add.py +++ b/cuda_core/examples/vector_add.py @@ -10,7 +10,7 @@ # ################################################################################ import cupy as cp -from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch +from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch # compute c = a + b code = """ diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index af99ddd361..d25442258d 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -69,7 +69,7 @@ issues = "https://github.com/NVIDIA/cuda-python/issues/" include = ["cuda.core*"] [tool.setuptools.package-data] -"cuda.core.experimental.include" = ["*.h", "*.hpp", "*.cuh"] +"cuda.core.include" = ["*.h", "*.hpp", "*.cuh"] [tool.setuptools.dynamic] version = { attr = "cuda.core._version.__version__" } diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index c0ea03930e..d39cba75ab 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -4,6 +4,7 @@ import multiprocessing import os +import cuda.core import helpers import pytest @@ -12,9 +13,8 @@ except ImportError: from cuda import cuda as driver -import cuda.core.experimental -from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device +from cuda.core._utils.cuda_utils import handle_return @pytest.fixture(scope="session", autouse=True) @@ -124,7 +124,7 @@ def mempool_device(): def _mempool_device_impl(num): - num_devices = len(cuda.core.experimental.system.devices) + num_devices = len(cuda.core.system.devices) if num_devices < num: pytest.skip(f"Test requires at least {num} GPUs") diff --git a/cuda_core/tests/cython/build_tests.sh b/cuda_core/tests/cython/build_tests.sh index eb3303840d..98851edefa 100755 --- a/cuda_core/tests/cython/build_tests.sh +++ b/cuda_core/tests/cython/build_tests.sh @@ -6,10 +6,10 @@ UNAME=$(uname) if [ "$UNAME" == "Linux" ] ; then SCRIPTPATH=$(dirname $(realpath "$0")) - export CPLUS_INCLUDE_PATH=${SCRIPTPATH}/../../cuda/core/experimental/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH + export CPLUS_INCLUDE_PATH=${SCRIPTPATH}/../../cuda/core/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH elif [[ "$UNAME" == CYGWIN* || "$UNAME" == MINGW* || "$UNAME" == MSYS* ]] ; then SCRIPTPATH="$(dirname $(cygpath -w $(realpath "$0")))" - CUDA_CORE_INCLUDE_PATH=$(echo "${SCRIPTPATH}\..\..\cuda\core\experimental\include" | sed 's/\\/\\\\/g') + CUDA_CORE_INCLUDE_PATH=$(echo "${SCRIPTPATH}\..\..\cuda\core\include" | sed 's/\\/\\\\/g') export CL="/I\"${CUDA_CORE_INCLUDE_PATH}\" /I\"${CUDA_HOME}\\include\" ${CL}" else exit 1 diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx index 0c3921e925..2b105e13ae 100644 --- a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -13,7 +13,7 @@ from cuda.bindings.nvrtc cimport nvrtcProgram as pynvrtcProgram from cuda.bindings.cydriver cimport CUstream, CUevent from cuda.bindings.cynvrtc cimport nvrtcProgram -from cuda.core.experimental import Device, Program +from cuda.core import Device, Program cdef extern from "utility.hpp": diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py index 450c60bf06..640b53c2fc 100644 --- a/cuda_core/tests/example_tests/test_basic_examples.py +++ b/cuda_core/tests/example_tests/test_basic_examples.py @@ -7,7 +7,7 @@ import os import pytest -from cuda.core.experimental import Device +from cuda.core import Device from .utils import run_example diff --git a/cuda_core/tests/helpers/buffers.py b/cuda_core/tests/helpers/buffers.py index b4d769eab3..3004cd0d00 100644 --- a/cuda_core/tests/helpers/buffers.py +++ b/cuda_core/tests/helpers/buffers.py @@ -3,8 +3,8 @@ import ctypes -from cuda.core.experimental import Buffer, Device, MemoryResource -from cuda.core.experimental._utils.cuda_utils import driver, handle_return +from cuda.core import Buffer, Device, MemoryResource +from cuda.core._utils.cuda_utils import driver, handle_return from . import libc diff --git a/cuda_core/tests/helpers/latch.py b/cuda_core/tests/helpers/latch.py index 46516c1b06..e35ee3325b 100644 --- a/cuda_core/tests/helpers/latch.py +++ b/cuda_core/tests/helpers/latch.py @@ -4,7 +4,7 @@ import ctypes import pytest -from cuda.core.experimental import ( +from cuda.core import ( LaunchConfig, LegacyPinnedMemoryResource, Program, diff --git a/cuda_core/tests/helpers/nanosleep_kernel.py b/cuda_core/tests/helpers/nanosleep_kernel.py index ea6ae34dcf..99d32c9aa4 100644 --- a/cuda_core/tests/helpers/nanosleep_kernel.py +++ b/cuda_core/tests/helpers/nanosleep_kernel.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental import ( +from cuda.core import ( LaunchConfig, Program, ProgramOptions, diff --git a/cuda_core/tests/memory_ipc/test_errors.py b/cuda_core/tests/memory_ipc/test_errors.py index d6280ae0ec..ccb3d3b7cc 100644 --- a/cuda_core/tests/memory_ipc/test_errors.py +++ b/cuda_core/tests/memory_ipc/test_errors.py @@ -5,8 +5,8 @@ import pickle import re -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core._utils.cuda_utils import CUDAError CHILD_TIMEOUT_SEC = 20 NBYTES = 64 diff --git a/cuda_core/tests/memory_ipc/test_event_ipc.py b/cuda_core/tests/memory_ipc/test_event_ipc.py index ce756cba21..e4b486e6e1 100644 --- a/cuda_core/tests/memory_ipc/test_event_ipc.py +++ b/cuda_core/tests/memory_ipc/test_event_ipc.py @@ -4,7 +4,7 @@ import multiprocessing as mp import pytest -from cuda.core.experimental import Device, EventOptions +from cuda.core import Device, EventOptions from helpers.buffers import compare_equal_buffers, make_scratch_buffer from helpers.latch import LatchKernel from helpers.logging import TimestampedLogger diff --git a/cuda_core/tests/memory_ipc/test_memory_ipc.py b/cuda_core/tests/memory_ipc/test_memory_ipc.py index 54d8056865..d92a28ab5a 100644 --- a/cuda_core/tests/memory_ipc/test_memory_ipc.py +++ b/cuda_core/tests/memory_ipc/test_memory_ipc.py @@ -3,7 +3,7 @@ import multiprocessing as mp -from cuda.core.experimental import Buffer, DeviceMemoryResource +from cuda.core import Buffer, DeviceMemoryResource from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_peer_access.py b/cuda_core/tests/memory_ipc/test_peer_access.py index 87dc459ffc..5a06133c9b 100644 --- a/cuda_core/tests/memory_ipc/test_peer_access.py +++ b/cuda_core/tests/memory_ipc/test_peer_access.py @@ -4,8 +4,8 @@ import multiprocessing as mp import pytest -from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core._utils.cuda_utils import CUDAError from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_send_buffers.py b/cuda_core/tests/memory_ipc/test_send_buffers.py index 3493828c7e..2df3fe1bbc 100644 --- a/cuda_core/tests/memory_ipc/test_send_buffers.py +++ b/cuda_core/tests/memory_ipc/test_send_buffers.py @@ -5,7 +5,7 @@ from itertools import cycle import pytest -from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_serialize.py b/cuda_core/tests/memory_ipc/test_serialize.py index 7fe65b2b4a..66c5f4da06 100644 --- a/cuda_core/tests/memory_ipc/test_serialize.py +++ b/cuda_core/tests/memory_ipc/test_serialize.py @@ -5,7 +5,7 @@ import multiprocessing.reduction import os -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource +from cuda.core import Buffer, Device, DeviceMemoryResource from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/memory_ipc/test_workerpool.py b/cuda_core/tests/memory_ipc/test_workerpool.py index 3f3f46cd27..b13b9896a1 100644 --- a/cuda_core/tests/memory_ipc/test_workerpool.py +++ b/cuda_core/tests/memory_ipc/test_workerpool.py @@ -6,7 +6,7 @@ from itertools import cycle import pytest -from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions +from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen CHILD_TIMEOUT_SEC = 20 diff --git a/cuda_core/tests/test_comparable.py b/cuda_core/tests/test_comparable.py index c99963cd23..a93e49e4e8 100644 --- a/cuda_core/tests/test_comparable.py +++ b/cuda_core/tests/test_comparable.py @@ -8,10 +8,10 @@ across Device, Stream, Event, and Context objects. """ -from cuda.core.experimental import Device, Stream -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._stream import StreamOptions +from cuda.core import Device, Stream +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._stream import StreamOptions # ============================================================================ # Equality Contract Tests diff --git a/cuda_core/tests/test_context.py b/cuda_core/tests/test_context.py index 4fe35dc18d..5183aa1a85 100644 --- a/cuda_core/tests/test_context.py +++ b/cuda_core/tests/test_context.py @@ -1,14 +1,14 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device +from cuda.core import Device def test_context_init_disabled(): with pytest.raises(RuntimeError, match=r"^Context objects cannot be instantiated directly\."): - cuda.core.experimental._context.Context() # Ensure back door is locked. + cuda.core._context.Context() # Ensure back door is locked. # ============================================================================ diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index b0a0518652..c68f8fb841 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -4,7 +4,7 @@ import pytest from cuda.bindings import driver, runtime -from cuda.core.experimental._utils import cuda_utils +from cuda.core._utils import cuda_utils def test_driver_cu_result_explanations_health(): diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index fa484fa65b..69849b1a2e 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -6,15 +6,15 @@ except ImportError: from cuda import cuda as driver from cuda import cudart as runtime -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device -from cuda.core.experimental._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return +from cuda.core import Device +from cuda.core._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return def test_device_init_disabled(): with pytest.raises(RuntimeError, match=r"^DeviceProperties cannot be instantiated directly\."): - cuda.core.experimental._device.DeviceProperties() # Ensure back door is locked. + cuda.core._device.DeviceProperties() # Ensure back door is locked. @pytest.fixture(scope="module") @@ -48,7 +48,7 @@ def test_device_alloc(deinit_cuda): def test_device_id(deinit_cuda): - for device in cuda.core.experimental.system.devices: + for device in cuda.core.system.devices: device.set_current() assert device.device_id == handle_return(runtime.cudaGetDevice()) diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index ec35448619..0d8f3a3c2d 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -4,9 +4,9 @@ import math -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, Event, EventOptions, @@ -17,7 +17,7 @@ def test_event_init_disabled(): with pytest.raises(RuntimeError, match=r"^Event objects cannot be instantiated directly\."): - cuda.core.experimental._event.Event() # Ensure back door is locked. + cuda.core._event.Event() # Ensure back door is locked. def test_timing_success(init_cuda): diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py new file mode 100644 index 0000000000..4ebeb8025d --- /dev/null +++ b/cuda_core/tests/test_experimental_backward_compat.py @@ -0,0 +1,164 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for backward compatibility of cuda.core.experimental namespace. + +These tests verify that the experimental namespace forwarding stubs work +correctly and emit appropriate deprecation warnings. +""" + +import pytest + + +# Test that experimental imports still work +def test_experimental_imports_work(): + """Test that imports from experimental namespace still work.""" + # Clear cached module to ensure warning is emitted + import sys + + if "cuda.core.experimental" in sys.modules: + del sys.modules["cuda.core.experimental"] + + # Test main module import - should emit deprecation warning + with pytest.deprecated_call(): + import cuda.core.experimental + + # Test that symbols are accessible + assert hasattr(cuda.core.experimental, "Device") + assert hasattr(cuda.core.experimental, "Stream") + assert hasattr(cuda.core.experimental, "Buffer") + assert hasattr(cuda.core.experimental, "system") + + +def test_experimental_symbols_are_same_objects(): + """Test that experimental namespace symbols are the same objects as core.""" + import cuda.core + import cuda.core.experimental + + # Compare classes/types + assert cuda.core.experimental.Device is cuda.core.Device + assert cuda.core.experimental.Stream is cuda.core.Stream + assert cuda.core.experimental.Buffer is cuda.core.Buffer + assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource + assert cuda.core.experimental.Program is cuda.core.Program + assert cuda.core.experimental.Kernel is cuda.core.Kernel + assert cuda.core.experimental.ObjectCode is cuda.core.ObjectCode + assert cuda.core.experimental.Graph is cuda.core.Graph + assert cuda.core.experimental.GraphBuilder is cuda.core.GraphBuilder + assert cuda.core.experimental.Event is cuda.core.Event + assert cuda.core.experimental.Linker is cuda.core.Linker + + # Compare singletons + assert cuda.core.experimental.system is cuda.core.system + + +def test_experimental_direct_imports(): + """Test that direct imports from experimental submodules work.""" + # Clear any cached imports to ensure warnings are emitted + import sys + + if "cuda.core.experimental" in sys.modules: + del sys.modules["cuda.core.experimental"] + + # Test various import patterns - warning is emitted once at module import time + with pytest.deprecated_call(): + from cuda.core.experimental import ( + Buffer, + Device, + Stream, + ) + + # Verify objects are usable + assert Device is not None + assert Stream is not None + assert Buffer is not None + + +def test_experimental_submodule_access(): + """Test that accessing experimental submodules works.""" + import cuda.core.experimental + + # Test that submodules can be accessed (via __getattr__) + # Note: These may not exist as actual modules, but the forwarding should work + try: + # This should trigger __getattr__ and forward to the new location + _ = cuda.core.experimental._device + _ = cuda.core.experimental._stream + _ = cuda.core.experimental._memory + except AttributeError: + # It's okay if submodules aren't directly accessible + # The important thing is that public symbols work + pass + + +def test_experimental_utils_module(): + """Test that experimental.utils module works. + + Note: The deprecation warning is only emitted once at import time when + cuda.core.experimental is first imported. Accessing utils or importing + from utils does not trigger additional warnings since utils is already + set as an attribute in the module namespace. + """ + import cuda.core.experimental + + # Should be able to access utils (no warning on access, only on initial import) + assert hasattr(cuda.core.experimental, "utils") + assert cuda.core.experimental.utils is not None + + # Should have expected utilities (no warning on import from utils submodule) + from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory + + assert StridedMemoryView is not None + assert args_viewable_as_strided_memory is not None + + +def test_experimental_options_classes(): + """Test that options classes are accessible.""" + import cuda.core.experimental + + assert hasattr(cuda.core.experimental, "EventOptions") + assert hasattr(cuda.core.experimental, "StreamOptions") + assert hasattr(cuda.core.experimental, "LaunchConfig") + assert hasattr(cuda.core.experimental, "ProgramOptions") + assert hasattr(cuda.core.experimental, "LinkerOptions") + assert hasattr(cuda.core.experimental, "GraphCompleteOptions") + assert hasattr(cuda.core.experimental, "GraphDebugPrintOptions") + assert hasattr(cuda.core.experimental, "DeviceMemoryResourceOptions") + assert hasattr(cuda.core.experimental, "VirtualMemoryResourceOptions") + + # Verify they're the same objects + assert cuda.core.experimental.EventOptions is cuda.core.EventOptions + assert cuda.core.experimental.StreamOptions is cuda.core.StreamOptions + assert cuda.core.experimental.LaunchConfig is cuda.core.LaunchConfig + + +def test_experimental_memory_classes(): + """Test that memory-related classes are accessible.""" + import cuda.core.experimental + + assert hasattr(cuda.core.experimental, "MemoryResource") + assert hasattr(cuda.core.experimental, "DeviceMemoryResource") + assert hasattr(cuda.core.experimental, "LegacyPinnedMemoryResource") + assert hasattr(cuda.core.experimental, "VirtualMemoryResource") + assert hasattr(cuda.core.experimental, "GraphMemoryResource") + + # Verify they're the same objects + assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource + assert cuda.core.experimental.DeviceMemoryResource is cuda.core.DeviceMemoryResource + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_experimental_instantiations(): + """Test that objects can be instantiated through experimental namespace.""" + from cuda.core.experimental import Device + + # Should be able to create objects + device = Device() + assert device is not None + + # Verify it's the same type + from cuda.core import Device as CoreDevice + + assert isinstance(device, CoreDevice) diff --git a/cuda_core/tests/test_graph.py b/cuda_core/tests/test_graph.py index e988eeebf6..aaad9304f4 100644 --- a/cuda_core/tests/test_graph.py +++ b/cuda_core/tests/test_graph.py @@ -11,7 +11,7 @@ from cuda.bindings import nvrtc except ImportError: from cuda import nvrtc -from cuda.core.experimental import ( +from cuda.core import ( Device, GraphBuilder, GraphCompleteOptions, @@ -22,7 +22,7 @@ ProgramOptions, launch, ) -from cuda.core.experimental._utils.cuda_utils import NVRTCError, handle_return +from cuda.core._utils.cuda_utils import NVRTCError, handle_return def _common_kernels(): diff --git a/cuda_core/tests/test_graph_mem.py b/cuda_core/tests/test_graph_mem.py index 964ce03b93..c68108ba51 100644 --- a/cuda_core/tests/test_graph_mem.py +++ b/cuda_core/tests/test_graph_mem.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, DeviceMemoryResource, GraphCompleteOptions, diff --git a/cuda_core/tests/test_hashable.py b/cuda_core/tests/test_hashable.py index 4aa801866f..9bc89969a2 100644 --- a/cuda_core/tests/test_hashable.py +++ b/cuda_core/tests/test_hashable.py @@ -12,10 +12,10 @@ 5. Hash/equality contract compliance (if a == b, then hash(a) must equal hash(b)) """ -from cuda.core.experimental import Device -from cuda.core.experimental._context import Context -from cuda.core.experimental._event import Event, EventOptions -from cuda.core.experimental._stream import Stream, StreamOptions +from cuda.core import Device +from cuda.core._context import Context +from cuda.core._event import Event, EventOptions +from cuda.core._stream import Stream, StreamOptions # ============================================================================ # Integration Tests diff --git a/cuda_core/tests/test_helpers.py b/cuda_core/tests/test_helpers.py index 65df23980c..8230f08088 100644 --- a/cuda_core/tests/test_helpers.py +++ b/cuda_core/tests/test_helpers.py @@ -5,7 +5,7 @@ import time import pytest -from cuda.core.experimental import Device +from cuda.core import Device from helpers import IS_WINDOWS, IS_WSL from helpers.buffers import PatternGen, compare_equal_buffers, make_scratch_buffer from helpers.latch import LatchKernel diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py index d2e0a89a28..ae3e5531c1 100644 --- a/cuda_core/tests/test_launcher.py +++ b/cuda_core/tests/test_launcher.py @@ -12,7 +12,7 @@ cp = None import numpy as np import pytest -from cuda.core.experimental import ( +from cuda.core import ( Device, DeviceMemoryResource, LaunchConfig, @@ -21,8 +21,8 @@ ProgramOptions, launch, ) -from cuda.core.experimental._memory import _SynchronousMemoryResource -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core._memory import _SynchronousMemoryResource +from cuda.core._utils.cuda_utils import CUDAError from conftest import skipif_need_cuda_headers @@ -95,7 +95,7 @@ def test_launch_config_cluster_grid_conversion(init_cuda): def test_launch_config_native_conversion(init_cuda): """Test that _to_native_launch_config correctly converts grid from cluster units to block units.""" - from cuda.core.experimental._launch_config import _to_native_launch_config + from cuda.core._launch_config import _to_native_launch_config try: # Test case 1: 1D - Issue #867 example @@ -264,7 +264,7 @@ def test_cooperative_launch(): # # Commented out as this seems to be a sticky error... # config = LaunchConfig(grid=1, block=1) # launch(s, config, ker) - # from cuda.core.experimental._utils.cuda_utils import CUDAError + # from cuda.core._utils.cuda_utils import CUDAError # with pytest.raises(CUDAError) as e: # s.sync() # assert "CUDA_ERROR_LAUNCH_FAILED" in str(e) diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index e0c8d37b65..ad68201456 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -3,9 +3,9 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest -from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker -from cuda.core.experimental._module import ObjectCode -from cuda.core.experimental._utils.cuda_utils import CUDAError +from cuda.core import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker +from cuda.core._module import ObjectCode +from cuda.core._utils.cuda_utils import CUDAError ARCH = "sm_" + "".join(f"{i}" for i in Device().compute_capability) diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index be46802493..1e198334f8 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -17,7 +17,7 @@ import re import pytest -from cuda.core.experimental import ( +from cuda.core import ( Buffer, Device, DeviceMemoryResource, @@ -27,13 +27,13 @@ VirtualMemoryResource, VirtualMemoryResourceOptions, ) -from cuda.core.experimental import ( +from cuda.core import ( system as ccx_system, ) -from cuda.core.experimental._dlpack import DLDeviceType -from cuda.core.experimental._memory import IPCBufferDescriptor -from cuda.core.experimental._utils.cuda_utils import CUDAError, handle_return -from cuda.core.experimental.utils import StridedMemoryView +from cuda.core._dlpack import DLDeviceType +from cuda.core._memory import IPCBufferDescriptor +from cuda.core._utils.cuda_utils import CUDAError, handle_return +from cuda.core.utils import StridedMemoryView from helpers import IS_WINDOWS from helpers.buffers import DummyUnifiedMemoryResource @@ -136,7 +136,7 @@ def test_package_contents(): "VirtualMemoryResource", ] d = {} - exec("from cuda.core.experimental._memory import *", d) # noqa: S102 + exec("from cuda.core._memory import *", d) # noqa: S102 d = {k: v for k, v in d.items() if not k.startswith("__")} assert sorted(expected) == sorted(d.keys()) diff --git a/cuda_core/tests/test_memory_peer_access.py b/cuda_core/tests/test_memory_peer_access.py index 66c2af23f1..7f48c85089 100644 --- a/cuda_core/tests/test_memory_peer_access.py +++ b/cuda_core/tests/test_memory_peer_access.py @@ -1,139 +1,38 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import DeviceMemoryResource -from cuda.core.experimental._utils.cuda_utils import CUDAError -from helpers.buffers import PatternGen, compare_buffer_to_constant, make_scratch_buffer +from cuda.core import Device NBYTES = 1024 -def test_peer_access_basic(mempool_device_x2): - """Basic tests for dmr.peer_accessible_by.""" - dev0, dev1 = mempool_device_x2 - zero_on_dev0 = make_scratch_buffer(dev0, 0, NBYTES) - one_on_dev0 = make_scratch_buffer(dev0, 1, NBYTES) - stream_on_dev0 = dev0.create_stream() - dmr_on_dev1 = DeviceMemoryResource(dev1) - buf_on_dev1 = dmr_on_dev1.allocate(NBYTES) +def _mempool_device_impl(num): + num_devices = len(cuda.core.system.devices) + if num_devices < num: + pytest.skip("Test requires at least {num} GPUs") - # No access at first. - assert 0 not in dmr_on_dev1.peer_accessible_by - with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"): - one_on_dev0.copy_to(buf_on_dev1, stream=stream_on_dev0) + devs = [Device(i) for i in range(num)] + for i in reversed(range(num)): + devs[i].set_current() - with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"): - zero_on_dev0.copy_from(buf_on_dev1, stream=stream_on_dev0) + if not all(devs[i].can_access_peer(j) for i in range(num) for j in range(num)): + pytest.skip("Test requires GPUs with peer access") - # Allow access to device 1's allocations from device 0. - dmr_on_dev1.peer_accessible_by = [dev0] - assert 0 in dmr_on_dev1.peer_accessible_by - compare_buffer_to_constant(zero_on_dev0, 0) - one_on_dev0.copy_to(buf_on_dev1, stream=stream_on_dev0) - zero_on_dev0.copy_from(buf_on_dev1, stream=stream_on_dev0) - stream_on_dev0.sync() - compare_buffer_to_constant(zero_on_dev0, 1) + if not all(devs[i].properties.memory_pools_supported for i in range(num)): + pytest.skip("Device does not support mempool operations") - # Revoke access - dmr_on_dev1.peer_accessible_by = [] - assert 0 not in dmr_on_dev1.peer_accessible_by - with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"): - one_on_dev0.copy_to(buf_on_dev1, stream=stream_on_dev0) + return devs - with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"): - zero_on_dev0.copy_from(buf_on_dev1, stream=stream_on_dev0) +@pytest.fixture +def mempool_device_x2(): + """Fixture that provides two devices if available, otherwise skips test.""" + return _mempool_device_impl(2) -def test_peer_access_property_x2(mempool_device_x2): - """The the dmr.peer_accessible_by property (but not its functionality).""" - # The peer access list is a sorted tuple and always excludes the self - # device. - dev0, dev1 = mempool_device_x2 - dmr = DeviceMemoryResource(dev0) - def check(expected): - assert isinstance(dmr.peer_accessible_by, tuple) - assert dmr.peer_accessible_by == expected - - # No access to begin with. - check(expected=()) - # fmt: off - dmr.peer_accessible_by = (0,) ; check(expected=()) # noqa: E702 - dmr.peer_accessible_by = (1,) ; check(expected=(1,)) # noqa: E702 - dmr.peer_accessible_by = (0, 1) ; check(expected=(1,)) # noqa: E702 - dmr.peer_accessible_by = () ; check(expected=()) # noqa: E702 - dmr.peer_accessible_by = [0, 1] ; check(expected=(1,)) # noqa: E702 - dmr.peer_accessible_by = set() ; check(expected=()) # noqa: E702 - dmr.peer_accessible_by = [1, 1, 1, 1, 1] ; check(expected=(1,)) # noqa: E702 - # fmt: on - - with pytest.raises(ValueError, match=r"device_id must be \>\= 0"): - dmr.peer_accessible_by = [-1] # device ID out of bounds - - num_devices = len(cuda.core.experimental.system.devices) - - with pytest.raises(ValueError, match=r"device_id must be within \[0, \d+\)"): - dmr.peer_accessible_by = [num_devices] # device ID out of bounds - - -def test_peer_access_transitions(mempool_device_x3): - """Advanced tests for dmr.peer_accessible_by.""" - - # Check all transitions between peer access states. The implementation - # performs transactions that add or remove access as needed. This test - # ensures that that is working as expected. - - # Doing everything from the point-of-view of device 0, there are four - # access states: - # - # [(), (1,), (2,), (1, 2)] - # - # and 4^2-4 = 12 non-identity transitions. - - devs = mempool_device_x3 # Three devices - - # Allocate per-device resources. - streams = [dev.create_stream() for dev in devs] - pgens = [PatternGen(devs[i], NBYTES, streams[i]) for i in range(3)] - dmrs = [DeviceMemoryResource(dev) for dev in devs] - bufs = [dmr.allocate(NBYTES) for dmr in dmrs] - - def verify_state(state, pattern_seed): - """ - Verify an access state from the POV of device 0. E.g., (1,) means - device 1 has access but device 2 does not. - """ - # Populate device 0's buffer with a new pattern. - devs[0].set_current() - pgens[0].fill_buffer(bufs[0], seed=pattern_seed) - streams[0].sync() - - for peer in [1, 2]: - devs[peer].set_current() - if peer in state: - # Peer device has access to 0's allocation - bufs[peer].copy_from(bufs[0], stream=streams[peer]) - # Check the result on the peer device. - pgens[peer].verify_buffer(bufs[peer], seed=pattern_seed) - else: - # Peer device has no access to 0's allocation - with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"): - bufs[peer].copy_from(bufs[0], stream=streams[peer]) - - # For each transition, set the access state before and after, checking for - # the expected peer access capabilities at each stop. - pattern_seed = 0 - states = [(), (1,), (2,), (1, 2)] - transitions = [(s0, s1) for s0 in states for s1 in states if s0 != s1] - for init_state, final_state in transitions: - dmrs[0].peer_accessible_by = init_state - assert dmrs[0].peer_accessible_by == init_state - verify_state(init_state, pattern_seed) - pattern_seed += 1 - - dmrs[0].peer_accessible_by = final_state - assert dmrs[0].peer_accessible_by == final_state - verify_state(final_state, pattern_seed) - pattern_seed += 1 +@pytest.fixture +def mempool_device_x3(): + """Fixture that provides three devices if available, otherwise skips test.""" + return _mempool_device_impl(3) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 901a57f7a4..a1934834fc 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -5,10 +5,10 @@ import pickle import warnings -import cuda.core.experimental +import cuda.core import pytest -from cuda.core.experimental import Device, ObjectCode, Program, ProgramOptions, system -from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return +from cuda.core import Device, ObjectCode, Program, ProgramOptions, system +from cuda.core._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return try: import numba @@ -41,17 +41,17 @@ def cuda12_4_prerequisite_check(): def test_kernel_attributes_init_disabled(): with pytest.raises(RuntimeError, match=r"^KernelAttributes cannot be instantiated directly\."): - cuda.core.experimental._module.KernelAttributes() # Ensure back door is locked. + cuda.core._module.KernelAttributes() # Ensure back door is locked. def test_kernel_occupancy_init_disabled(): with pytest.raises(RuntimeError, match=r"^KernelOccupancy cannot be instantiated directly\."): - cuda.core.experimental._module.KernelOccupancy() # Ensure back door is locked. + cuda.core._module.KernelOccupancy() # Ensure back door is locked. def test_kernel_init_disabled(): with pytest.raises(RuntimeError, match=r"^Kernel objects cannot be instantiated directly\."): - cuda.core.experimental._module.Kernel() # Ensure back door is locked. + cuda.core._module.Kernel() # Ensure back door is locked. def test_object_code_init_disabled(): @@ -387,7 +387,7 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel_cubin, cluster): dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") - launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64, cluster=cluster) + launch_config = cuda.core.LaunchConfig(grid=128, block=64, cluster=cluster) query_fn = kernel.occupancy.max_active_clusters max_active_clusters = query_fn(launch_config) assert isinstance(max_active_clusters, int) @@ -402,7 +402,7 @@ def test_occupancy_max_potential_cluster_size(get_saxpy_kernel_cubin): dev = Device() if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") - launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64) + launch_config = cuda.core.LaunchConfig(grid=128, block=64) query_fn = kernel.occupancy.max_potential_cluster_size max_potential_cluster_size = query_fn(launch_config) assert isinstance(max_potential_cluster_size, int) diff --git a/cuda_core/tests/test_multiprocessing_warning.py b/cuda_core/tests/test_multiprocessing_warning.py index 945ea83964..214fa360b2 100644 --- a/cuda_core/tests/test_multiprocessing_warning.py +++ b/cuda_core/tests/test_multiprocessing_warning.py @@ -12,13 +12,13 @@ import warnings from unittest.mock import patch -from cuda.core.experimental import DeviceMemoryResource, DeviceMemoryResourceOptions, EventOptions -from cuda.core.experimental._event import _reduce_event -from cuda.core.experimental._memory._ipc import ( +from cuda.core import DeviceMemoryResource, DeviceMemoryResourceOptions, EventOptions +from cuda.core._event import _reduce_event +from cuda.core._memory._ipc import ( _deep_reduce_device_memory_resource, _reduce_allocation_handle, ) -from cuda.core.experimental._utils.cuda_utils import reset_fork_warning +from cuda.core._utils.cuda_utils import reset_fork_warning def test_warn_on_fork_method_device_memory_resource(ipc_device): diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8a6526fcc2..e85fd690d9 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -6,10 +6,10 @@ import warnings import pytest -from cuda.core.experimental import _linker -from cuda.core.experimental._module import Kernel, ObjectCode -from cuda.core.experimental._program import Program, ProgramOptions -from cuda.core.experimental._utils.cuda_utils import CUDAError, driver, handle_return +from cuda.core import _linker +from cuda.core._module import Kernel, ObjectCode +from cuda.core._program import Program, ProgramOptions +from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() @@ -18,7 +18,7 @@ def _is_nvvm_available(): """Check if NVVM is available.""" try: - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module _get_nvvm_module() return True @@ -31,7 +31,7 @@ def _is_nvvm_available(): ) try: - from cuda.core.experimental._utils.cuda_utils import driver, handle_return, nvrtc + from cuda.core._utils.cuda_utils import driver, handle_return, nvrtc _cuda_driver_version = handle_return(driver.cuDriverGetVersion()) except Exception: @@ -91,7 +91,7 @@ def _get_libnvvm_version_for_tests(): _libnvvm_version_attempted = True try: - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() @@ -139,7 +139,7 @@ def nvvm_ir(): fallback assumes no version metadata will be present in the input nvvm ir """ - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() major, minor, debug_major, debug_minor = nvvm.ir_version() @@ -329,7 +329,7 @@ def test_program_close(): @nvvm_available def test_nvvm_deferred_import(): """Test that our deferred NVVM import works correctly""" - from cuda.core.experimental._program import _get_nvvm_module + from cuda.core._program import _get_nvvm_module nvvm = _get_nvvm_module() assert nvvm is not None diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index 695a70e931..01b0b861af 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from cuda.core.experimental import Device, Stream, StreamOptions -from cuda.core.experimental._event import Event -from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM -from cuda.core.experimental._utils.cuda_utils import driver +from cuda.core import Device, Stream, StreamOptions +from cuda.core._event import Event +from cuda.core._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM +from cuda.core._utils.cuda_utils import driver from helpers.misc import StreamWrapper diff --git a/cuda_core/tests/test_strided_layout.py b/cuda_core/tests/test_strided_layout.py index a0f63f7aaa..f2baaa03d3 100644 --- a/cuda_core/tests/test_strided_layout.py +++ b/cuda_core/tests/test_strided_layout.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from cuda.core.experimental._layout import StridedLayout +from cuda.core._layout import StridedLayout from helpers.layout import ( DenseOrder, LayoutSpec, diff --git a/cuda_core/tests/test_system.py b/cuda_core/tests/test_system.py index d5195ed872..fb39d018c3 100644 --- a/cuda_core/tests/test_system.py +++ b/cuda_core/tests/test_system.py @@ -7,8 +7,8 @@ from cuda import cuda as driver from cuda import cudart as runtime -from cuda.core.experimental import Device, system -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core import Device, system +from cuda.core._utils.cuda_utils import handle_return def test_system_singleton(): diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py index 927d7bc239..502adacc89 100644 --- a/cuda_core/tests/test_utils.py +++ b/cuda_core/tests/test_utils.py @@ -12,15 +12,16 @@ from numba import cuda as numba_cuda except ImportError: numba_cuda = None -import cuda.core.experimental +import cuda.core import numpy as np import pytest -from cuda.core.experimental import Device -from cuda.core.experimental.utils import StridedLayout, StridedMemoryView, args_viewable_as_strided_memory +from cuda.core import Device +from cuda.core._layout import StridedLayout +from cuda.core.utils import StridedMemoryView, args_viewable_as_strided_memory def test_cast_to_3_tuple_success(): - c3t = cuda.core.experimental._utils.cuda_utils.cast_to_3_tuple + c3t = cuda.core._utils.cuda_utils.cast_to_3_tuple assert c3t("", ()) == (1, 1, 1) assert c3t("", 2) == (2, 1, 1) assert c3t("", (2,)) == (2, 1, 1) @@ -44,7 +45,7 @@ def test_cast_to_3_tuple_success(): ) def test_cast_to_3_tuple_value_error(cfg, expected): with pytest.raises(ValueError, match=expected): - cuda.core.experimental._utils.cuda_utils.cast_to_3_tuple("Lbl", cfg) + cuda.core._utils.cuda_utils.cast_to_3_tuple("Lbl", cfg) def convert_strides_to_counts(strides, itemsize): diff --git a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py index a661b4f1aa..e7829df406 100644 --- a/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py +++ b/cuda_python_test_helpers/cuda_python_test_helpers/__init__.py @@ -9,7 +9,7 @@ from contextlib import suppress from typing import Union -from cuda.core.experimental._utils.cuda_utils import handle_return +from cuda.core._utils.cuda_utils import handle_return __all__ = [ "IS_WINDOWS",