diff --git a/cuda_bindings/cuda/bindings/utils/__init__.py b/cuda_bindings/cuda/bindings/utils/__init__.py index a2dfe7ce8b..e4fcb15e8f 100644 --- a/cuda_bindings/cuda/bindings/utils/__init__.py +++ b/cuda_bindings/cuda/bindings/utils/__init__.py @@ -3,6 +3,7 @@ from typing import Any, Callable from ._ptx_utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver +from ._version_check import warn_if_cuda_major_version_mismatch _handle_getters: dict[type, Callable[[Any], int]] = {} diff --git a/cuda_bindings/cuda/bindings/utils/_version_check.py b/cuda_bindings/cuda/bindings/utils/_version_check.py new file mode 100644 index 0000000000..1ebd7f3d92 --- /dev/null +++ b/cuda_bindings/cuda/bindings/utils/_version_check.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import os +import threading +import warnings + +# Track whether we've already checked major version compatibility +_major_version_compatibility_checked = False +_lock = threading.Lock() + + +def warn_if_cuda_major_version_mismatch(): + """Warn if the CUDA driver major version is older than cuda-bindings compile-time version. + + This function compares the CUDA major version that cuda-bindings was compiled + against with the CUDA major version supported by the installed driver. If the + compile-time major version is greater than the driver's major version, a warning + is issued. + + The check runs only once per process. Subsequent calls are no-ops. + + The warning can be suppressed by setting the environment variable + ``CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING=1``. + """ + global _major_version_compatibility_checked + if _major_version_compatibility_checked: + return + with _lock: + if _major_version_compatibility_checked: + return + _major_version_compatibility_checked = True + + # Allow users to suppress the warning + if os.environ.get("CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING"): + return + + # Import here to avoid circular imports and allow lazy loading + from cuda.bindings import driver + + # Get compile-time CUDA version from cuda-bindings + compile_version = driver.CUDA_VERSION # e.g., 13010 + compile_major = compile_version // 1000 + + # Get runtime driver version + err, runtime_version = driver.cuDriverGetVersion() + if err != driver.CUresult.CUDA_SUCCESS: + raise RuntimeError(f"Failed to query CUDA driver version: {err}") + + runtime_major = runtime_version // 1000 + + if compile_major > runtime_major: + warnings.warn( + f"cuda-bindings was built for CUDA major version {compile_major}, but the " + f"NVIDIA driver only supports up to CUDA {runtime_major}. Some cuda-bindings " + f"features may not work correctly. Consider updating your NVIDIA driver, " + f"or using a cuda-bindings version built for CUDA {runtime_major}. " + f"(Set CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING=1 to suppress this warning.)", + UserWarning, + stacklevel=3, + ) diff --git a/cuda_bindings/docs/source/environment_variables.rst b/cuda_bindings/docs/source/environment_variables.rst index a212bfe764..7a49fb66a3 100644 --- a/cuda_bindings/docs/source/environment_variables.rst +++ b/cuda_bindings/docs/source/environment_variables.rst @@ -9,6 +9,8 @@ Runtime Environment Variables - ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` : When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See `Stream Synchronization Behavior `_ for an explanation of the legacy and per-thread default streams. +- ``CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING`` : When set to 1, suppresses warnings about CUDA major version mismatches between ``cuda-bindings`` and the installed driver. + Build-Time Environment Variables -------------------------------- diff --git a/cuda_bindings/tests/test_version_check.py b/cuda_bindings/tests/test_version_check.py new file mode 100644 index 0000000000..9862d93cc9 --- /dev/null +++ b/cuda_bindings/tests/test_version_check.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import os +import warnings +from unittest import mock + +import pytest +from cuda.bindings import driver +from cuda.bindings.utils import _version_check, warn_if_cuda_major_version_mismatch + + +class TestVersionCompatibilityCheck: + """Tests for CUDA major version mismatch warning function.""" + + @pytest.fixture(autouse=True) + def reset_version_check(self, monkeypatch): + """Reset the version compatibility check flag for each test, restoring after.""" + monkeypatch.setattr(_version_check, "_major_version_compatibility_checked", False) + + def test_no_warning_when_driver_newer(self): + """No warning should be issued when driver version >= compile version.""" + # Mock compile version 12.9 and driver version 13.0 + with ( + mock.patch.object(driver, "CUDA_VERSION", 12090), + mock.patch.object(driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_SUCCESS, 13000)), + warnings.catch_warnings(record=True) as w, + ): + warnings.simplefilter("always") + warn_if_cuda_major_version_mismatch() + assert len(w) == 0 + + def test_no_warning_when_same_major_version(self): + """No warning should be issued when major versions match.""" + # Mock compile version 12.9 and driver version 12.8 + with ( + mock.patch.object(driver, "CUDA_VERSION", 12090), + mock.patch.object(driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_SUCCESS, 12080)), + warnings.catch_warnings(record=True) as w, + ): + warnings.simplefilter("always") + warn_if_cuda_major_version_mismatch() + assert len(w) == 0 + + def test_warning_when_compile_major_newer(self): + """Warning should be issued when compile major version > driver major version.""" + # Mock compile version 13.0 and driver version 12.8 + with ( + mock.patch.object(driver, "CUDA_VERSION", 13000), + mock.patch.object(driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_SUCCESS, 12080)), + warnings.catch_warnings(record=True) as w, + ): + warnings.simplefilter("always") + warn_if_cuda_major_version_mismatch() + assert len(w) == 1 + assert issubclass(w[0].category, UserWarning) + assert "cuda-bindings was built for CUDA major version 13" in str(w[0].message) + assert "only supports up to CUDA 12" in str(w[0].message) + + def test_warning_only_issued_once(self): + """Warning should only be issued once per process.""" + with ( + mock.patch.object(driver, "CUDA_VERSION", 13000), + mock.patch.object(driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_SUCCESS, 12080)), + warnings.catch_warnings(record=True) as w, + ): + warnings.simplefilter("always") + warn_if_cuda_major_version_mismatch() + warn_if_cuda_major_version_mismatch() + warn_if_cuda_major_version_mismatch() + # Only one warning despite multiple calls + assert len(w) == 1 + + def test_warning_suppressed_by_env_var(self): + """Warning should be suppressed when CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING is set.""" + with ( + mock.patch.object(driver, "CUDA_VERSION", 13000), + mock.patch.object(driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_SUCCESS, 12080)), + mock.patch.dict(os.environ, {"CUDA_PYTHON_DISABLE_MAJOR_VERSION_WARNING": "1"}), + warnings.catch_warnings(record=True) as w, + ): + warnings.simplefilter("always") + warn_if_cuda_major_version_mismatch() + assert len(w) == 0 + + def test_error_when_driver_version_fails(self): + """Should raise RuntimeError if cuDriverGetVersion fails.""" + with ( + mock.patch.object(driver, "CUDA_VERSION", 13000), + mock.patch.object( + driver, "cuDriverGetVersion", return_value=(driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 0) + ), + pytest.raises(RuntimeError, match="Failed to query CUDA driver version"), + ): + warn_if_cuda_major_version_mismatch() diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx index dc7b0b69c9..9bd7785efb 100644 --- a/cuda_core/cuda/core/_device.pyx +++ b/cuda_core/cuda/core/_device.pyx @@ -960,53 +960,12 @@ class Device: __slots__ = ("_device_id", "_memory_resource", "_has_inited", "_properties", "_uuid", "_context") def __new__(cls, device_id: Device | int | None = None): - # Handle device_id argument. if isinstance(device_id, Device): return device_id - else: - device_id = getattr(device_id, 'device_id', device_id) - - # Initialize CUDA. - global _is_cuInit - if _is_cuInit is False: - with _lock, nogil: - HANDLE_RETURN(cydriver.cuInit(0)) - _is_cuInit = True - # important: creating a Device instance does not initialize the GPU! - cdef cydriver.CUdevice dev - cdef cydriver.CUcontext ctx - if device_id is None: - with nogil: - err = cydriver.cuCtxGetDevice(&dev) - if err == cydriver.CUresult.CUDA_SUCCESS: - device_id = int(dev) - elif err == cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT: - # No context is current - verify and default to device 0 (cudart behavior) - assert cydriver.cuCtxGetCurrent(&ctx) == cydriver.CUresult.CUDA_SUCCESS and ctx == NULL - device_id = 0 - else: - HANDLE_RETURN(err) - elif device_id < 0: - raise ValueError(f"device_id must be >= 0, got {device_id}") - - # ensure Device is singleton - cdef int total - try: - devices = _tls.devices - except AttributeError: - with nogil: - HANDLE_RETURN(cydriver.cuDeviceGetCount(&total)) - devices = _tls.devices = [] - for i in range(total): - device = super().__new__(cls) - device._device_id = i - device._memory_resource = None - device._has_inited = False - device._properties = None - device._uuid = None - device._context = None - devices.append(device) + Device_ensure_cuda_initialized() + device_id = Device_resolve_device_id(device_id) + devices = Device_ensure_tls_devices(cls) try: return devices[device_id] @@ -1393,3 +1352,62 @@ class Device: """ self._check_context_initialized() return GraphBuilder._init(stream=self.create_stream(), is_stream_owner=True) + + +cdef inline int Device_ensure_cuda_initialized() except? -1: + """Initialize CUDA driver and check version compatibility (once per process).""" + global _is_cuInit + if _is_cuInit is False: + with _lock, nogil: + HANDLE_RETURN(cydriver.cuInit(0)) + _is_cuInit = True + try: + from cuda.bindings.utils import warn_if_cuda_major_version_mismatch + except ImportError: + pass + else: + warn_if_cuda_major_version_mismatch() + return 0 + + +cdef inline int Device_resolve_device_id(device_id) except? -1: + """Resolve device_id, defaulting to current device or 0.""" + cdef cydriver.CUdevice dev + cdef cydriver.CUcontext ctx + cdef cydriver.CUresult err + if device_id is None: + with nogil: + err = cydriver.cuCtxGetDevice(&dev) + if err == cydriver.CUresult.CUDA_SUCCESS: + return int(dev) + elif err == cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT: + with nogil: + HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx)) + assert (ctx) == NULL + return 0 # cudart behavior + else: + HANDLE_RETURN(err) + elif device_id < 0: + raise ValueError(f"device_id must be >= 0, got {device_id}") + return device_id + + +cdef inline list Device_ensure_tls_devices(cls): + """Ensure thread-local Device singletons exist, creating if needed.""" + cdef int total + try: + return _tls.devices + except AttributeError: + with nogil: + HANDLE_RETURN(cydriver.cuDeviceGetCount(&total)) + devices = _tls.devices = [] + for dev_id in range(total): + device = super(Device, cls).__new__(cls) + device._device_id = dev_id + device._memory_resource = None + device._has_inited = False + device._properties = None + device._uuid = None + device._context = None + devices.append(device) + return devices