diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md index fa51b56fa7..1e115f2eda 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/README.md +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -31,12 +31,13 @@ strategy for locating NVIDIA shared libraries: - Falls back to native loader: - `dlopen()` on Linux - `LoadLibraryW()` on Windows + - Conda installations are expected to be discovered: + - Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary; + note that this preempts `LD_LIBRARY_PATH` and `/etc/ld.so.conf.d/`) + - Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH` - CTK installations with system config updates are expected to be discovered: - Linux: Via `/etc/ld.so.conf.d/*cuda*.conf` - Windows: Via `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` on system `PATH` - - Conda installations are expected to be discovered: - - Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary) - - Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH` 3. **Environment variables** - Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py index ec305be927..0d13680a6b 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -1,8 +1,6 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import ctypes -import ctypes.wintypes from typing import Optional import pywintypes @@ -36,46 +34,11 @@ def add_dll_directory(dll_abs_path: str) -> None: def abs_path_for_dynamic_library(libname: str, handle: pywintypes.HANDLE) -> str: - """Get the absolute path of a loaded dynamic library on Windows. - - Args: - handle: The library handle - - Returns: - The absolute path to the DLL file - - Raises: - OSError: If GetModuleFileNameW fails - RuntimeError: If the required path length is unreasonably long - """ - MAX_ITERATIONS = 10 # Allows for extremely long paths (up to ~266,000 chars) - buf_size = 260 # Start with traditional MAX_PATH - - for _ in range(MAX_ITERATIONS): - buf = ctypes.create_unicode_buffer(buf_size) - n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, buf_size) - - if n_chars == 0: - raise OSError( - f"GetModuleFileNameW failed ({libname=!r}, {buf_size=}). " - "Long paths may require enabling the " - "Windows 10+ long path registry setting. See: " - "https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" - ) - if n_chars < buf_size - 1: - return buf.value - - buf_size *= 2 # Double the buffer size and try again - - raise RuntimeError( - f"Failed to retrieve the full path after {MAX_ITERATIONS} attempts " - f"(final buffer size: {buf_size} characters). " - "This may indicate:\n" - " 1. An extremely long path requiring Windows long path support, or\n" - " 2. An invalid or corrupt library handle, or\n" - " 3. An unexpected system error.\n" - "See: https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" - ) + """Get the absolute path of a loaded dynamic library on Windows.""" + try: + return win32api.GetModuleFileName(handle) + except Exception as e: + raise RuntimeError(f"GetModuleFileName failed for {libname!r} (exception type: {type(e)})") from e def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: diff --git a/cuda_bindings/tests/run_python_code_safely.py b/cuda_bindings/tests/run_python_code_safely.py deleted file mode 100644 index 349ed96826..0000000000 --- a/cuda_bindings/tests/run_python_code_safely.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2025 NVIDIA Corporation. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - -import multiprocessing -import queue # for Empty -import subprocess # nosec B404 -import sys -import traceback -from io import StringIO - - -class Worker: - def __init__(self, python_code, result_queue): - self.python_code = python_code - self.result_queue = result_queue - - def __call__(self): - # Capture stdout/stderr - old_stdout = sys.stdout - old_stderr = sys.stderr - sys.stdout = StringIO() - sys.stderr = StringIO() - - try: - exec(self.python_code, {"__name__": "__main__"}) # nosec B102 - returncode = 0 - except SystemExit as e: # Handle sys.exit() - returncode = e.code if isinstance(e.code, int) else 0 - except BaseException: - traceback.print_exc() - returncode = 1 - finally: - # Collect outputs and restore streams - stdout = sys.stdout.getvalue() - stderr = sys.stderr.getvalue() - sys.stdout = old_stdout - sys.stderr = old_stderr - try: # noqa: SIM105 - self.result_queue.put((returncode, stdout, stderr)) - except Exception: # nosec B110 - # If the queue is broken (e.g., parent gone), best effort logging - pass - - -def run_python_code_safely(python_code, *, timeout=None): - """Run Python code in a spawned subprocess, capturing stdout/stderr/output.""" - ctx = multiprocessing.get_context("spawn") - result_queue = ctx.Queue() - process = ctx.Process(target=Worker(python_code, result_queue)) - process.start() - - try: - process.join(timeout) - if process.is_alive(): - process.terminate() - process.join() - return subprocess.CompletedProcess( - args=[sys.executable, "-c", python_code], - returncode=-9, - stdout="", - stderr=f"Process timed out after {timeout} seconds and was terminated.", - ) - - try: - returncode, stdout, stderr = result_queue.get(timeout=1.0) - except (queue.Empty, EOFError): - return subprocess.CompletedProcess( - args=[sys.executable, "-c", python_code], - returncode=-999, - stdout="", - stderr="Process exited or crashed before returning results.", - ) - - return subprocess.CompletedProcess( - args=[sys.executable, "-c", python_code], - returncode=returncode, - stdout=stdout, - stderr=stderr, - ) - - finally: - try: - result_queue.close() - result_queue.join_thread() - except Exception: # nosec B110 - pass - if process.is_alive(): - process.kill() - process.join() diff --git a/cuda_bindings/tests/spawned_process_runner.py b/cuda_bindings/tests/spawned_process_runner.py new file mode 100644 index 0000000000..3a13362fe2 --- /dev/null +++ b/cuda_bindings/tests/spawned_process_runner.py @@ -0,0 +1,126 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import multiprocessing +import queue # for Empty +import sys +import traceback +from dataclasses import dataclass +from io import StringIO +from typing import Any, Callable, Optional, Sequence + +PROCESS_KILLED = -9 +PROCESS_NO_RESULT = -999 + + +# Similar to https://docs.python.org/3/library/subprocess.html#subprocess.CompletedProcess +# (args, check_returncode() are intentionally not supported here.) +@dataclass +class CompletedProcess: + returncode: int + stdout: str + stderr: str + + +class ChildProcessWrapper: + def __init__(self, result_queue, target, args, kwargs): + self.target = target + self.args = () if args is None else args + self.kwargs = {} if kwargs is None else kwargs + self.result_queue = result_queue + + def __call__(self): + # Capture stdout/stderr + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = StringIO() + sys.stderr = StringIO() + + try: + self.target(*self.args, **self.kwargs) + returncode = 0 + except SystemExit as e: # Handle sys.exit() + returncode = e.code if isinstance(e.code, int) else 0 + except BaseException: + traceback.print_exc() + returncode = 1 + finally: + # Collect outputs and restore streams + stdout = sys.stdout.getvalue() + stderr = sys.stderr.getvalue() + sys.stdout = old_stdout + sys.stderr = old_stderr + try: # noqa: SIM105 + self.result_queue.put((returncode, stdout, stderr)) + except Exception: # nosec B110 + # If the queue is broken (e.g., parent gone), best effort logging + pass + + +def run_in_spawned_child_process( + target: Callable[..., None], + *, + args: Optional[Sequence[Any]] = None, + kwargs: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + rethrow: bool = False, +) -> CompletedProcess: + """Run `target` in a spawned child process, capturing stdout/stderr. + + The provided `target` must be defined at the top level of a module, and must + be importable in the spawned child process. Lambdas, closures, or interactively + defined functions (e.g., in Jupyter notebooks) will not work. + + If `rethrow=True` and the child process exits with a nonzero code, + raises ChildProcessError with the captured stderr. + """ + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue() + process = ctx.Process(target=ChildProcessWrapper(result_queue, target, args, kwargs)) + process.start() + + try: + process.join(timeout) + if process.is_alive(): + process.terminate() + process.join() + result = CompletedProcess( + returncode=PROCESS_KILLED, + stdout="", + stderr=f"Process timed out after {timeout} seconds and was terminated.", + ) + else: + try: + returncode, stdout, stderr = result_queue.get(timeout=1.0) + except (queue.Empty, EOFError): + result = CompletedProcess( + returncode=PROCESS_NO_RESULT, + stdout="", + stderr="Process exited or crashed before returning results.", + ) + else: + result = CompletedProcess( + returncode=returncode, + stdout=stdout, + stderr=stderr, + ) + + if rethrow and result.returncode != 0: + raise ChildProcessError( + f"Child process exited with code {result.returncode}.\n" + "--- stderr-from-child-process ---\n" + f"{result.stderr}" + "\n" + ) + + return result + + finally: + try: + result_queue.close() + result_queue.join_thread() + except Exception: # nosec B110 + pass + if process.is_alive(): + process.kill() + process.join() diff --git a/cuda_bindings/tests/test_path_finder_load.py b/cuda_bindings/tests/test_path_finder_load.py index 5c21e8a058..66defe2bac 100644 --- a/cuda_bindings/tests/test_path_finder_load.py +++ b/cuda_bindings/tests/test_path_finder_load.py @@ -5,7 +5,7 @@ import sys import pytest -from run_python_code_safely import run_python_code_safely +import spawned_process_runner from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs @@ -38,46 +38,45 @@ def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) -def build_subprocess_failed_for_libname_message(libname, result): +def build_child_process_failed_for_libname_message(libname, result): return ( - f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" - f"--- stdout-from-subprocess ---\n{result.stdout}\n" - f"--- stderr-from-subprocess ---\n{result.stderr}\n" + f"Child process failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-child-process ---\n{result.stdout}\n" + f"--- stderr-from-child-process ---\n{result.stderr}\n" ) +def child_process_func(libname): + import os + + from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache + from cuda.bindings.path_finder import _load_nvidia_dynamic_library + + loaded_dl_fresh = _load_nvidia_dynamic_library(libname) + if loaded_dl_fresh.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") + + loaded_dl_from_cache = _load_nvidia_dynamic_library(libname) + if loaded_dl_from_cache is not loaded_dl_fresh: + raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") + + loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache(libname) + if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") + if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): + raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})") + + print(f"{loaded_dl_fresh.abs_path!r}") + + @pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): - # We intentionally run each dynamic library operation in a subprocess + # We intentionally run each dynamic library operation in a child process # to ensure isolation of global dynamic linking state (e.g., dlopen handles). - # Without subprocesses, loading/unloading libraries during testing could + # Without child processes, loading/unloading libraries during testing could # interfere across test cases and lead to nondeterministic or platform-specific failures. - # - # Defining the subprocess code snippets as strings ensures each subprocess - # runs a minimal, independent script tailored to the specific libname and API being tested. - code = f"""\ -import os -from cuda.bindings.path_finder import _load_nvidia_dynamic_library -from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache - -loaded_dl_fresh = _load_nvidia_dynamic_library({libname!r}) -if loaded_dl_fresh.was_already_loaded_from_elsewhere: - raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") - -loaded_dl_from_cache = _load_nvidia_dynamic_library({libname!r}) -if loaded_dl_from_cache is not loaded_dl_fresh: - raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") - -loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r}) -if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: - raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") -if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): - raise RuntimeError(f"not os.path.samefile({{loaded_dl_no_cache.abs_path=!r}}, {{loaded_dl_fresh.abs_path=!r}})") - -print(f"{{loaded_dl_fresh.abs_path!r}}") -""" - result = run_python_code_safely(code, timeout=30) + result = spawned_process_runner.run_in_spawned_child_process(child_process_func, args=(libname,), timeout=30) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") else: - raise RuntimeError(build_subprocess_failed_for_libname_message(libname, result)) + raise RuntimeError(build_child_process_failed_for_libname_message(libname, result)) diff --git a/cuda_bindings/tests/test_spawned_process_runner.py b/cuda_bindings/tests/test_spawned_process_runner.py new file mode 100644 index 0000000000..644ed8a839 --- /dev/null +++ b/cuda_bindings/tests/test_spawned_process_runner.py @@ -0,0 +1,21 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# Note: This only covers what is not covered already in test_path_finder_load.py + +import pytest +from spawned_process_runner import run_in_spawned_child_process + + +def child_crashes(): + raise RuntimeError("this is an intentional failure") + + +def test_rethrow_child_exception(): + with pytest.raises(ChildProcessError) as excinfo: + run_in_spawned_child_process(child_crashes, rethrow=True) + + msg = str(excinfo.value) + assert "Child process exited with code 1" in msg + assert "this is an intentional failure" in msg + assert "--- stderr-from-child-process ---" in msg