From ad30969e3da99765673b5f181ae0afdcce715a3c Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Sat, 20 Dec 2025 11:02:43 -0800 Subject: [PATCH] revert 5609 changes --- .gitignore | 4 - CMakeLists.txt | 34 --- README.md | 21 +- python/pyproject.toml | 2 +- python/setup.py | 23 -- python/tools/prereqs/__init__.py | 96 ------ python/tools/prereqs/build_tools.py | 140 --------- python/tools/prereqs/compiler.py | 102 ------- python/tools/prereqs/exceptions.py | 23 -- python/tools/prereqs/git.py | 137 --------- python/tools/prereqs/llvm.py | 238 --------------- python/tools/prereqs/nccl.py | 311 -------------------- python/tools/prereqs/platform.py | 129 -------- python/tools/prereqs/python_packages.py | 375 ------------------------ python/tools/prereqs/python_version.py | 114 ------- python/tools/prereqs/requirements.py | 269 ----------------- python/tools/prereqs/validate.py | 168 ----------- python/utils.py | 34 --- requirements.txt | 4 +- setup.py | 30 -- 20 files changed, 6 insertions(+), 2248 deletions(-) delete mode 100644 python/tools/prereqs/__init__.py delete mode 100644 python/tools/prereqs/build_tools.py delete mode 100644 python/tools/prereqs/compiler.py delete mode 100644 python/tools/prereqs/exceptions.py delete mode 100644 python/tools/prereqs/git.py delete mode 100644 python/tools/prereqs/llvm.py delete mode 100644 python/tools/prereqs/nccl.py delete mode 100644 python/tools/prereqs/platform.py delete mode 100644 python/tools/prereqs/python_packages.py delete mode 100644 python/tools/prereqs/python_version.py delete mode 100644 python/tools/prereqs/requirements.py delete mode 100644 python/tools/prereqs/validate.py diff --git a/.gitignore b/.gitignore index 89cb16f952f..8b78beed82e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,6 @@ bin build .lintbin -# Project-local LLVM installation (created by following prerequisite validation instructions) -.llvm/ -llvm.sh - nvfuser_common/version.py nvfuser_common/include nvfuser_comon/lib diff --git a/CMakeLists.txt b/CMakeLists.txt index 33aabf07f23..d3671597992 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,13 +41,6 @@ endif() message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}") -# NCCL include directory for distributed builds -# This can be set by the Python build system when using pip-bundled NCCL -set(NCCL_INCLUDE_DIR "" CACHE PATH "Directory containing nccl.h header") -if(NVFUSER_DISTRIBUTED AND NCCL_INCLUDE_DIR) - message(STATUS "Using NCCL include dir: ${NCCL_INCLUDE_DIR}") -endif() - # We try to update which C++ standard we use together in lockstep across all # built libraries, and these variables control which that is. Generally we are # on C++20, but we still support a version of CUDA (11) that does not recognize @@ -443,29 +436,6 @@ list(APPEND NVFUSER_SRCS # Add LLVM JIT related dependencies set(LLVM_MINIMUM_VERSION "18.1") - -# Check for project-local LLVM installations first (no sudo required) -# This searches .llvm/ directory for version-specific installations -if(NOT LLVM_DIR) - file(GLOB LLVM_CANDIDATES - "${CMAKE_SOURCE_DIR}/.llvm/*/lib/cmake/llvm" - "${CMAKE_SOURCE_DIR}/third_party/llvm/*/lib/cmake/llvm" - ) - # Sort to get highest version first (assumes version numbers in paths) - list(SORT LLVM_CANDIDATES COMPARE NATURAL ORDER DESCENDING) - foreach(CANDIDATE ${LLVM_CANDIDATES}) - if(EXISTS "${CANDIDATE}/LLVMConfig.cmake") - # Extract version from path to check if it meets minimum - string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" CANDIDATE_VERSION "${CANDIDATE}") - if(CANDIDATE_VERSION VERSION_GREATER_EQUAL LLVM_MINIMUM_VERSION) - set(LLVM_DIR "${CANDIDATE}" CACHE PATH "Path to LLVM CMake config") - message(STATUS "Found project-local LLVM ${CANDIDATE_VERSION} at ${LLVM_DIR}") - break() - endif() - endif() - endforeach() -endif() - find_package(LLVM REQUIRED CONFIG) if(${LLVM_VERSION} VERSION_LESS ${LLVM_MINIMUM_VERSION}) message(FATAL_ERROR "LLVM ${LLVM_VERSION} does not meet the minimum version required: ${LLVM_MINIMUM_VERSION}") @@ -558,10 +528,6 @@ target_include_directories(codegen_internal SYSTEM PUBLIC PRIVATE ${CUDA_INCLUDE_DIRS} ) -# Add NCCL include path for distributed builds (from pip-bundled nvidia-nccl-cu*) -if(NVFUSER_DISTRIBUTED AND NCCL_INCLUDE_DIR) - target_include_directories(codegen_internal SYSTEM PRIVATE ${NCCL_INCLUDE_DIR}) -endif() target_include_directories(codegen_internal PUBLIC "$" "$" diff --git a/README.md b/README.md index e65419b70da..b83ef898a5e 100644 --- a/README.md +++ b/README.md @@ -55,33 +55,20 @@ pip install -r python/requirements.txt pip install --no-build-isolation -e python -v ``` -### Prerequisite Validation - -Prerequisites are automatically validated before building. If validation fails, follow the error message instructions. - -**Validated**: Python, CMake, Ninja, pybind11, PyTorch (CUDA), System CUDA toolkit, Git submodules, GCC, LLVM - -**Skip validation** (for CI or custom setups): -```bash -NVFUSER_BUILD_SKIP_VALIDATION=1 pip install --no-build-isolation -e python -v -``` - -See `python/setup.py` for all build environment variables. - Supported compilers: **GCC:** -GCC 13+ is required (C++20 `` header support). Recommended versions: +We support all "supported releases" of gcc as specified in [the official site](https://gcc.gnu.org/). +As of 3/2/2025, they are: +- gcc 12.4 - gcc 13.3 - gcc 14.2 -Note: GCC 12 is not supported due to missing `` header in libstdc++. - **Clang:** -- clang 19+ +- clang 16+ Supported C++ standard: diff --git a/python/pyproject.toml b/python/pyproject.toml index 5f3df6e0407..d7813c1ed06 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools>=42", "wheel", "ninja", "cmake>=3.18", "pybind11[global]>=2.0"] +requires = ["setuptools>=42", "wheel", "ninja", "cmake>=3.18"] build-backend = "setuptools.build_meta:__legacy__" diff --git a/python/setup.py b/python/setup.py index b11e7dcebc0..f8246fb201c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -66,12 +66,7 @@ # NVFUSER_BUILD_CPP_STANDARD=STANDARD # Specify the C++ standard to use for building nvfuser. The default is C++20. # -# NVFUSER_BUILD_SKIP_VALIDATION -# Skip prerequisite validation checks. Use this for CI or when you know -# all prerequisites are satisfied. Validation will still run if not set. -# -import os import sys from utils import ( @@ -101,24 +96,6 @@ def main(): # Override build config from environment variables override_build_config_from_env(config) - # Prerequisite validation (can be skipped with NVFUSER_BUILD_SKIP_VALIDATION) - if not os.environ.get("NVFUSER_BUILD_SKIP_VALIDATION"): - try: - from tools.prereqs import validate_prerequisites - - validate_prerequisites() - except ImportError as e: - # Prerequisite validation not available (shouldn't happen in dev) - print(f"WARNING: Could not import prerequisite validation: {e}") - except Exception as e: - # Prerequisite check failed - print(f"\n{e}\n", file=sys.stderr) - sys.exit(1) - else: - print( - "[nvFuser] Skipping prerequisite validation (NVFUSER_BUILD_SKIP_VALIDATION set)" - ) - if "clean" in sys.argv: # only disables BUILD_SETUP, but keep the argument for setuptools config.build_setup = False diff --git a/python/tools/prereqs/__init__.py b/python/tools/prereqs/__init__.py deleted file mode 100644 index 326cd4977c3..00000000000 --- a/python/tools/prereqs/__init__.py +++ /dev/null @@ -1,96 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -nvFuser Prerequisite Validation Package - -This package provides utilities for validating build prerequisites before -attempting to build nvFuser from source. It helps provide clear, actionable -error messages when prerequisites are missing or have incorrect versions. - -Version requirements are centralized in requirements.py. See PYTHON, CMAKE, -NINJA, PYTORCH, CUDA, PYBIND11, GCC, LLVM constants for current versions. - -Key Components: - - PrerequisiteMissingError: Exception raised when prerequisites are missing - - detect_platform(): Detect OS, architecture, and Linux distribution - - format_platform_info(): Format platform information as readable string - - check_python_version(): Validate Python meets minimum version - - check_cmake_version(): Validate CMake meets minimum version - - check_ninja_installed(): Validate Ninja build system (any version) - - check_pybind11_installed(): Validate pybind11 with CMake support - - check_torch_installed(): Validate PyTorch with CUDA support - - check_git_submodules_initialized(): Validate git submodules are initialized - - validate_compiler(): Validate C++ compiler (GCC 13+ or Clang 19+) - - check_nccl_available(): Validate NCCL headers/library for distributed builds - - check_llvm_installed(): Validate LLVM for build-time linking - -Usage: - from tools.prereqs import PrerequisiteMissingError, detect_platform - - platform_info = detect_platform() - if platform_info['os'] != 'Linux': - raise PrerequisiteMissingError("nvFuser requires Linux") -""" - -from .exceptions import PrerequisiteMissingError -from .platform import detect_platform, format_platform_info -from .python_version import check_python_version -from .build_tools import check_cmake_version, check_ninja_installed -from .python_packages import check_pybind11_installed, check_torch_installed -from .git import check_git_submodules_initialized -from .compiler import validate_compiler -from .nccl import check_nccl_available -from .llvm import check_llvm_installed -from .validate import validate_prerequisites -from .requirements import ( - Requirement, - parse_version, - format_version, - PYTHON, - CMAKE, - NINJA, - PYTORCH, - CUDA, - PYBIND11, - GCC, - CLANG, - LLVM, - CUDA_AVAILABLE, - pytorch_index_url, - llvm_download_url, - pytorch_install_instructions, -) - -__all__ = [ - "PrerequisiteMissingError", - "detect_platform", - "format_platform_info", - "check_python_version", - "check_cmake_version", - "check_ninja_installed", - "check_pybind11_installed", - "check_torch_installed", - "check_git_submodules_initialized", - "validate_compiler", - "check_nccl_available", - "check_llvm_installed", - "validate_prerequisites", - # Central requirements - "Requirement", - "parse_version", - "format_version", - "PYTHON", - "CMAKE", - "NINJA", - "PYTORCH", - "CUDA", - "PYBIND11", - "GCC", - "CLANG", - "LLVM", - "CUDA_AVAILABLE", - "pytorch_index_url", - "llvm_download_url", - "pytorch_install_instructions", -] diff --git a/python/tools/prereqs/build_tools.py b/python/tools/prereqs/build_tools.py deleted file mode 100644 index e2c97f70e9d..00000000000 --- a/python/tools/prereqs/build_tools.py +++ /dev/null @@ -1,140 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Build tool validation for nvFuser build system. - -Validates that CMake and Ninja build tools are installed with minimum required -versions. These tools are essential for configuring and building nvFuser. -""" - -import re -import shutil -import subprocess -from typing import Tuple - -from .exceptions import PrerequisiteMissingError -from .requirements import CMAKE, NINJA, format_version - - -def check_cmake_version() -> Tuple[int, int, int]: - """ - Check that CMake meets nvFuser's minimum requirement. - - CMake is required for modern CUDA support features used by nvFuser. - - Returns: - Tuple[int, int, int]: CMake version as (major, minor, patch) tuple - - Raises: - PrerequisiteMissingError: If CMake is not installed or version is below minimum - - Example: - >>> version = check_cmake_version() - [nvFuser] CMake: 3.22.1 ✓ - >>> version - (3, 22, 1) - """ - # Check if cmake exists in PATH - if not shutil.which("cmake"): - raise PrerequisiteMissingError( - f"ERROR: {CMAKE.name} is not installed.\n\n" - f"{CMAKE.name} {CMAKE.min_display} is required to configure the nvFuser build.\n" - f"{CMAKE.name} {CMAKE.min_display} provides modern CUDA support features.\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or install {CMAKE.name} individually:\n" - f" pip install 'cmake>={CMAKE.min_str}'" - ) - - # Get CMake version - try: - result = subprocess.run( - ["cmake", "--version"], capture_output=True, text=True, check=True - ) - except subprocess.CalledProcessError as e: - raise PrerequisiteMissingError( - f"ERROR: Failed to check {CMAKE.name} version: {e}\n\n" - f"Install {CMAKE.name}:\n" - f" pip install cmake" - ) - - # Parse version string - # Expected format: "cmake version 3.22.1" (first line) - version_line = result.stdout.strip().split("\n")[0] - - # Extract version numbers using regex - version_match = re.search(r"(\d+)\.(\d+)\.(\d+)", version_line) - if not version_match: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {CMAKE.name} version from: {version_line}\n\n" - f"Please ensure {CMAKE.name} is installed correctly:\n" - f" pip install cmake" - ) - - major, minor, patch = map(int, version_match.groups()) - detected = (major, minor, patch) - - # Check minimum version requirement - if not CMAKE.check(detected): - raise PrerequisiteMissingError( - f"ERROR: {CMAKE.name} {CMAKE.min_display} is required to build nvFuser.\n" - f"Found: {CMAKE.name} {format_version(detected)}\n\n" - f"{CMAKE.name} {CMAKE.min_display} is required for modern CUDA support features.\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or upgrade {CMAKE.name} individually:\n" - f" pip install --upgrade 'cmake>={CMAKE.min_str}'" - ) - - return (major, minor, patch) - - -def check_ninja_installed() -> str: - """ - Check that Ninja build system is installed. - - Ninja provides fast parallel builds and is recommended for nvFuser. - Any version is accepted. - - Returns: - str: Ninja version string - - Raises: - PrerequisiteMissingError: If Ninja is not installed - - Example: - >>> version = check_ninja_installed() - [nvFuser] Ninja: 1.11.1 ✓ - >>> version - '1.11.1' - """ - # Check if ninja exists in PATH - if not shutil.which("ninja"): - raise PrerequisiteMissingError( - f"ERROR: {NINJA.name} build system is not installed.\n\n" - f"{NINJA.name} is required for fast parallel builds of nvFuser.\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or install {NINJA.name} individually:\n" - f" pip install ninja" - ) - - # Get Ninja version - try: - result = subprocess.run( - ["ninja", "--version"], capture_output=True, text=True, check=True - ) - except subprocess.CalledProcessError as e: - raise PrerequisiteMissingError( - f"ERROR: Failed to check {NINJA.name} version: {e}\n\n" - f"Install {NINJA.name}:\n" - f" pip install ninja" - ) - - # Parse version string - # Expected format: "1.11.1" (just the version number) - version_str = result.stdout.strip() - - # Note: NINJA.min_version is None, so any version is accepted - return version_str diff --git a/python/tools/prereqs/compiler.py b/python/tools/prereqs/compiler.py deleted file mode 100644 index 0be0ff1ea02..00000000000 --- a/python/tools/prereqs/compiler.py +++ /dev/null @@ -1,102 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -C++ compiler validation for nvFuser build. - -nvFuser requires GCC 13+ or Clang 19+ to build because the source code uses -the C++20 header. This module detects the compiler type and version, -and verifies header availability through a compile test. - -Note: CI may use Clang via update-alternatives (gcc -> clang), so we detect -the actual compiler from version output, not just the command name. -""" - -import re -import subprocess -from typing import Optional, Tuple - -from .exceptions import PrerequisiteMissingError -from .requirements import GCC, CLANG, format_version - - -def get_compiler_info() -> Optional[Tuple[str, Tuple[int, int, int]]]: - """ - Get C++ compiler type and version. - - Runs 'gcc --version' and detects whether it's actually GCC or Clang - (CI uses update-alternatives to make gcc point to clang). - - Returns: - Optional[Tuple[str, Tuple[int, int, int]]]: - ("gcc", (major, minor, patch)) or ("clang", (major, minor, patch)), - or None if no compiler found - """ - try: - result = subprocess.run( - ["gcc", "--version"], capture_output=True, text=True, check=False - ) - - if result.returncode != 0: - return None - - output = result.stdout.lower() - first_line = result.stdout.splitlines()[0] if result.stdout else "" - - # Check if it's actually Clang (via update-alternatives) - if "clang" in output: - match = re.search(r"clang version\s+(\d+)\.(\d+)\.(\d+)", output) - if match: - return ( - "clang", - (int(match.group(1)), int(match.group(2)), int(match.group(3))), - ) - return None - - # Parse as GCC - match = re.search(r"(\d+)\.(\d+)\.(\d+)", first_line) - if match: - return ( - "gcc", - (int(match.group(1)), int(match.group(2)), int(match.group(3))), - ) - - return None - - except FileNotFoundError: - return None - - -def validate_compiler() -> Tuple[str, Tuple[int, int, int]]: - """ - Validate that C++ compiler meets requirements (GCC 13+ or Clang 19+). - - Returns: - Tuple[str, Tuple[int, int, int]]: (compiler_type, version_tuple) - - Raises: - PrerequisiteMissingError: If compiler not found or version too low - """ - info = get_compiler_info() - - if info is None: - raise PrerequisiteMissingError( - f"ERROR: No C++ compiler found. nvFuser requires GCC {GCC.min_display} or Clang {CLANG.min_display}.\n\n" - f"To install GCC {GCC.min_version[0]} on Ubuntu:\n" - f" sudo apt install gcc-{GCC.min_version[0]} g++-{GCC.min_version[0]}\n" - ) - - compiler_type, version = info - - # Check version based on compiler type - if compiler_type == "clang": - req = CLANG - else: - req = GCC - - if not req.check(version): - raise PrerequisiteMissingError( - f"ERROR: nvFuser requires {req.name} {req.min_display} to build.\n" - f"Found: {req.name} {format_version(version)}\n\n" - ) - return (compiler_type, version) diff --git a/python/tools/prereqs/exceptions.py b/python/tools/prereqs/exceptions.py deleted file mode 100644 index 2830ed5c48c..00000000000 --- a/python/tools/prereqs/exceptions.py +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Custom exceptions for nvFuser prerequisite validation. - -These exceptions provide structured error handling for build prerequisite -checks, enabling clear and actionable error messages. -""" - - -class PrerequisiteMissingError(Exception): - """ - Raised when a prerequisite for building nvFuser is missing or has an incorrect version. - - This exception should include: - - What prerequisite is missing or incorrect - - Why it's required - - Exact commands to install or fix it - - Platform-specific guidance when applicable - """ - - pass diff --git a/python/tools/prereqs/git.py b/python/tools/prereqs/git.py deleted file mode 100644 index 2cd5b1712fa..00000000000 --- a/python/tools/prereqs/git.py +++ /dev/null @@ -1,137 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Git submodules validation for nvFuser build. - -nvFuser has third-party dependencies managed as git submodules (cutlass, -flatbuffers, googletest, benchmark). Uninitialized submodules cause cryptic -"file not found" CMake errors during configuration. - -This module detects uninitialized git submodules and provides actionable -instructions to initialize them. -""" - -import subprocess -from pathlib import Path -from typing import List, Optional, Tuple - -from .exceptions import PrerequisiteMissingError - - -def _find_repo_root() -> Optional[Path]: - """ - Find git repository root using git's own detection. - - This uses 'git rev-parse --show-toplevel' which is more reliable than - walking directories looking for .git (handles git worktrees correctly). - - Returns: - Optional[Path]: Repository root path, or None if not in a git repository - """ - try: - result = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], - capture_output=True, - text=True, - check=True, - ) - return Path(result.stdout.strip()) - except (subprocess.CalledProcessError, FileNotFoundError): - # Not in a git repository, or git not installed - return None - - -def check_git_submodules_initialized() -> List[Tuple[str, str]]: - """ - Check that all git submodules in the repository are initialized. - - Returns: - List[Tuple[str, str]]: List of (submodule_path, commit_hash) for - initialized submodules - - Raises: - PrerequisiteMissingError: If any submodules are uninitialized - - Example: - >>> submodules = check_git_submodules_initialized() - [nvFuser] Git submodules: 4 initialized ✓ - >>> submodules - [('third_party/benchmark', '0d98dba29d66...'), ...] - """ - # Find the git repository root - repo_root = _find_repo_root() - - if repo_root is None: - # Not in a git repository - this is acceptable (e.g., pip installed package) - # Don't raise error, just skip the check silently - return [] - - try: - # Run git submodule status from repository root - result = subprocess.run( - ["git", "submodule", "status"], - cwd=repo_root, - capture_output=True, - text=True, - check=False, - ) - - if result.returncode != 0: - # git command failed - probably not a git repo or git not installed - # This is acceptable in some scenarios (pip install from tarball) - return [] - - # Parse submodule status output - # Format: " ()" for initialized - # "- ()" for uninitialized - lines = result.stdout.strip().splitlines() - - if not lines: - # No submodules defined - return [] - - initialized = [] - uninitialized = [] - - for line in lines: - if not line: - continue - - # Check first character - status_char = line[0] - # Parse rest of line: ... - parts = line[1:].split(maxsplit=2) - if len(parts) >= 2: - commit, path = parts[0], parts[1] - - if status_char == "-": - uninitialized.append(path) - else: - initialized.append((path, commit)) - - # If any uninitialized, raise detailed error - if uninitialized: - error_msg = ( - "ERROR: Git submodules are not initialized.\n\n" - "nvFuser requires the following third-party dependencies as git submodules:\n" - ) - for path in uninitialized: - error_msg += f" - {path}\n" - - error_msg += ( - f"\nUninitialized submodules cause CMake configuration errors like:\n" - f" 'CMake Error: Cannot find source file: third_party/.../file.h'\n\n" - f"To initialize all submodules:\n" - f" cd {repo_root}\n" - f" git submodule update --init --recursive\n\n" - f"This will download and initialize all required dependencies.\n" - ) - - raise PrerequisiteMissingError(error_msg) - - return initialized - - except FileNotFoundError: - # git command not found - acceptable in some scenarios - return [] diff --git a/python/tools/prereqs/llvm.py b/python/tools/prereqs/llvm.py deleted file mode 100644 index 0e37332a4ba..00000000000 --- a/python/tools/prereqs/llvm.py +++ /dev/null @@ -1,238 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -LLVM prerequisite validation for nvFuser build. - -nvFuser requires LLVM to build because it links against LLVM libraries -during compilation for runtime Host IR JIT compilation. The build will fail -during CMake configuration or linking if LLVM is missing or too old. - -Ubuntu 22.04 ships with LLVM 14 by default, which is too old. Users must -install a newer LLVM either from prebuilt binaries (no sudo required) or from -the LLVM APT repository. -""" - -import os -import shutil -import subprocess -from pathlib import Path -from typing import Optional, Tuple - -from .exceptions import PrerequisiteMissingError -from .requirements import LLVM, parse_version, llvm_download_url - - -def _find_llvm_config() -> Optional[str]: - """ - Locate llvm-config binary in order of priority. - - Priority: - 1. LLVM_CONFIG environment variable - 2. LLVM_DIR/bin/llvm-config environment variable (CMake convention) - 3. LLVM_ROOT/bin/llvm-config environment variable - 4. llvm-config on PATH - 5. System known locations - 6. Project-local locations (scanning for compatible versions) - - Returns: - Optional[str]: Path to llvm-config if found, None otherwise - - Example: - >>> llvm_config = _find_llvm_config() - >>> llvm_config - '/home/user/nvfuser/.llvm/18.1.8/bin/llvm-config' - """ - candidates = [] - llvm_major = LLVM.min_version[0] # e.g., 18 - - # 1. Explicit LLVM_CONFIG env var - if llvm_config_env := os.environ.get("LLVM_CONFIG"): - candidates.append(llvm_config_env) - - # 2. LLVM_DIR (CMake convention) - # CMake typically sets LLVM_DIR to lib/cmake/llvm or similar - # Try multiple navigation patterns for robustness - if llvm_dir := os.environ.get("LLVM_DIR"): - llvm_dir_path = Path(llvm_dir) - candidates.append( - llvm_dir_path / ".." / ".." / ".." / "bin" / "llvm-config" - ) # lib/cmake/llvm -> root/bin - candidates.append( - llvm_dir_path / ".." / ".." / "bin" / "llvm-config" - ) # cmake/llvm -> root/bin - candidates.append( - llvm_dir_path / "bin" / "llvm-config" - ) # if LLVM_DIR points to root - - # 3. LLVM_ROOT (alternative convention) - if llvm_root := os.environ.get("LLVM_ROOT"): - candidates.append(os.path.join(llvm_root, "bin", "llvm-config")) - - # 4. PATH lookup - if llvm_in_path := shutil.which("llvm-config"): - candidates.append(llvm_in_path) - - # 5. System known locations (use minimum major version) - system_paths = [ - f"/usr/lib/llvm-{llvm_major}/bin/llvm-config", - f"/usr/local/llvm-{llvm_major}/bin/llvm-config", - "/opt/llvm/bin/llvm-config", - ] - candidates.extend(system_paths) - - # 6. Project-local locations (wildcards for minor version variations) - # Navigate from python/tools/prereqs to repo root (3 levels up) - repo_root = Path(__file__).resolve().parents[3] - project_paths = [] - - # Check for compatible versions in project locations - for parent in [repo_root / ".llvm", repo_root / "third_party" / "llvm"]: - if parent.exists(): - # Scan for compatible versions (minimum and above) - for major in range(llvm_major, llvm_major + 3): # e.g., 18, 19, 20 - for child in parent.glob(f"{major}.*"): - if child.is_dir(): - project_paths.append(child / "bin" / "llvm-config") - - candidates.extend([str(p) for p in project_paths]) - - # Try each candidate - for candidate in candidates: - if candidate: - candidate_path = Path(candidate) - if candidate_path.exists() and os.access(candidate_path, os.X_OK): - return str(candidate_path) - - return None - - -def _parse_llvm_version(version_str: str) -> Optional[Tuple[int, ...]]: - """ - Parse LLVM version string into tuple. - - LLVM version format examples: - - "18.1.8" - - "18.1.8git" - - "19.0.0" - - Args: - version_str: Version string from llvm-config --version - - Returns: - Optional[Tuple[int, ...]]: Version tuple or None if parse fails - - Example: - >>> _parse_llvm_version("18.1.8") - (18, 1, 8) - >>> _parse_llvm_version("18.1.8git") - (18, 1, 8) - """ - try: - return parse_version(version_str) - except ValueError: - return None - - -def raise_installation_error( - repo_root, llvm_major, download_url, tarball_name, dir_name -): - raise PrerequisiteMissingError( - f"ERROR: {LLVM.name} not found.\n\n" - f"nvFuser requires {LLVM.name} {LLVM.min_display} to build (for runtime Host IR JIT).\n" - f"llvm-config must be in PATH or at a known location.\n\n" - f"Installation options:\n\n" - f"Option 1: Download prebuilt binaries (recommended, no sudo needed, project-local):\n" - f" cd {repo_root} # your nvfuser repo root\n" - f" mkdir -p .llvm\n" - f" cd .llvm\n" - f" wget {download_url}\n" - f" tar -xf {tarball_name}\n" - f" mv {dir_name} {LLVM.recommended_str}\n" - f" # Then set environment variable:\n" - f" export LLVM_CONFIG=$(pwd)/{LLVM.recommended_str}/bin/llvm-config\n" - f" # Install legacy library libtinfo5 if missing\n" - f" wget http://mirrors.kernel.org/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb\n" - f" sudo apt install ./libtinfo5_6.3-2ubuntu0.1_amd64.deb\n\n" - f"Option 2: Install from LLVM APT repository (requires sudo):\n" - f" # Install prerequisites\n" - f" sudo apt install libzstd1 libzstd-dev lsb-release wget software-properties-common gnupg\n" - f" wget https://apt.llvm.org/llvm.sh\n" - f" chmod +x llvm.sh\n" - f" sudo ./llvm.sh {llvm_major}\n" - f" # llvm-config-{llvm_major} will be installed at /usr/lib/llvm-{llvm_major}/bin/llvm-config\n" - f" export LLVM_CONFIG=/usr/lib/llvm-{llvm_major}/bin/llvm-config\n" - ) - - -def check_llvm_installed() -> str: - """ - Validate that LLVM meets minimum version requirement for building nvFuser. - - This is the main validation function that should be called during - nvFuser's setup process. It checks: - 1. llvm-config is available (in PATH or common locations) - 2. LLVM version meets minimum requirement - - Returns: - str: LLVM version string (e.g., "18.1.8") - - Raises: - PrerequisiteMissingError: If LLVM not found or version below minimum - - Example: - >>> version = check_llvm_installed() - [nvFuser] LLVM: 18.1.8 ✓ - >>> version - '18.1.8' - """ - # Calculate repo root for error messages - repo_root = Path(__file__).resolve().parents[3] - llvm_major = LLVM.min_version[0] - download_url = llvm_download_url() - tarball_name = download_url.split("/")[-1] - dir_name = tarball_name.replace(".tar.xz", "") - - # Find llvm-config - llvm_config = _find_llvm_config() - - if not llvm_config: - raise_installation_error( - repo_root, llvm_major, download_url, tarball_name, dir_name - ) - - # Get version - try: - result = subprocess.run( - [llvm_config, "--version"], capture_output=True, text=True, check=True - ) - version_str = result.stdout.strip() - except subprocess.CalledProcessError as e: - raise PrerequisiteMissingError( - f"ERROR: Failed to get {LLVM.name} version from: {llvm_config}\n\n" - f"Command: {llvm_config} --version\n" - f"Error: {e.stderr}\n" - ) - except FileNotFoundError: - raise PrerequisiteMissingError( - f"ERROR: llvm-config found at {llvm_config} but cannot be executed.\n\n" - f"The file may not have execute permissions or may be corrupted.\n" - ) - - # Parse version - version_tuple = _parse_llvm_version(version_str) - - if version_tuple is None: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {LLVM.name} version from: {version_str}\n\n" - f"llvm-config location: {llvm_config}\n" - f"Expected version format: {LLVM.recommended_str} or similar\n" - ) - - # Check version requirement - if not LLVM.check(version_tuple): - raise_installation_error( - repo_root, llvm_major, download_url, tarball_name, dir_name - ) - - return version_str diff --git a/python/tools/prereqs/nccl.py b/python/tools/prereqs/nccl.py deleted file mode 100644 index 2de8d2f507d..00000000000 --- a/python/tools/prereqs/nccl.py +++ /dev/null @@ -1,311 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -NCCL header and library detection for nvFuser distributed build. - -nvFuser's distributed/multi-GPU support requires NCCL headers to compile. -When NVFUSER_DISTRIBUTED is enabled (default), nvFuser includes PyTorch's -ProcessGroupNCCL.hpp which in turn includes as a system header. - -Include chain: - nvFuser: csrc/multidevice/communication.cpp - └── #include - └── #include - └── #include (system include) - -NCCL can be found in several locations: -1. Bundled with PyTorch pip package: nvidia-nccl-cu* provides headers and libs - at {site-packages}/nvidia/nccl/include and {site-packages}/nvidia/nccl/lib -2. System installation: apt install libnccl-dev -3. CUDA toolkit: sometimes bundled with CUDA -4. Custom installation: via NCCL_ROOT or NCCL_INCLUDE_DIR env vars - -This module checks these locations to detect NCCL before the build starts. -Detection is skipped if: -- NVFUSER_BUILD_WITHOUT_DISTRIBUTED is set, OR -- PyTorch was built without distributed support (torch._C._has_distributed() == False) -""" - -import os -import platform -import sys -from pathlib import Path -from typing import List, Optional, Tuple - -from .exceptions import PrerequisiteMissingError - - -def _get_multiarch_lib_path() -> Path: - """ - Get the architecture-specific library path for Debian/Ubuntu multiarch. - - Returns the correct path based on the current CPU architecture: - - x86_64 → /usr/lib/x86_64-linux-gnu - - aarch64 → /usr/lib/aarch64-linux-gnu - - Returns: - Path: Architecture-specific library directory - - Example: - >>> _get_multiarch_lib_path() - PosixPath('/usr/lib/x86_64-linux-gnu') # on x86_64 - PosixPath('/usr/lib/aarch64-linux-gnu') # on ARM64 - """ - machine = platform.machine() - - if machine == "x86_64": - return Path("/usr/lib/x86_64-linux-gnu") - elif machine == "aarch64": - return Path("/usr/lib/aarch64-linux-gnu") - else: - # Fallback for unknown architectures - return Path("/usr/lib") - - -def _get_pip_nccl_paths() -> Tuple[Optional[Path], Optional[Path]]: - """ - Find NCCL headers and library from pip-installed nvidia-nccl-cu* package. - - PyTorch's pip package depends on nvidia-nccl-cu* which bundles: - - {site-packages}/nvidia/nccl/include/nccl.h - - {site-packages}/nvidia/nccl/lib/libnccl.so.2 - - Note: Similar logic exists in utils.py::get_pip_nccl_include_dir() for the - build system. This function returns both include AND lib paths for complete - validation, while utils.py only needs the include path for CMake. The - duplication is intentional to keep validation and build logic independent. - - Returns: - Tuple of (include_path, lib_path) or (None, None) if not found - - Example: - >>> inc, lib = _get_pip_nccl_paths() - >>> inc - PosixPath('/path/to/site-packages/nvidia/nccl/include') - """ - # Search all site-packages directories - for site_path in sys.path: - if not site_path: - continue - nccl_include = Path(site_path) / "nvidia" / "nccl" / "include" - nccl_lib = Path(site_path) / "nvidia" / "nccl" / "lib" - - header = nccl_include / "nccl.h" - # Check for versioned library (libnccl.so.2) or unversioned - lib_exists = (nccl_lib / "libnccl.so.2").exists() or ( - nccl_lib / "libnccl.so" - ).exists() - - if header.exists() and lib_exists: - return nccl_include, nccl_lib - - return None, None - - -def _get_nccl_search_paths() -> Tuple[List[Path], List[Path]]: - """ - Get NCCL header and library search paths matching compiler/CMake logic. - - Search order: - 1. Pip-installed nvidia-nccl-cu* package (highest priority - bundled with PyTorch) - 2. Explicit environment variable overrides (NCCL_INCLUDE_DIR, NCCL_LIB_DIR) - 3. NCCL_ROOT based paths - 4. CUDA toolkit paths (NCCL sometimes bundled with CUDA) - 5. Standard system paths - - Returns: - Tuple[List[Path], List[Path]]: (include_paths, library_paths) - - Example: - >>> inc_paths, lib_paths = _get_nccl_search_paths() - >>> inc_paths[0] - PosixPath('/path/to/site-packages/nvidia/nccl/include') - """ - include_paths: List[Path] = [] - library_paths: List[Path] = [] - - # 1. Pip-installed nvidia-nccl-cu* (bundled with PyTorch) - # This is the most common case for pip-installed PyTorch users - pip_inc, pip_lib = _get_pip_nccl_paths() - if pip_inc: - include_paths.append(pip_inc) - if pip_lib: - library_paths.append(pip_lib) - - # 2. Explicit NCCL_INCLUDE_DIR and NCCL_LIB_DIR (user override) - # These match PyTorch's FindNCCL.cmake behavior - if nccl_include := os.environ.get("NCCL_INCLUDE_DIR"): - include_paths.append(Path(nccl_include)) - - if nccl_lib := os.environ.get("NCCL_LIB_DIR"): - library_paths.append(Path(nccl_lib)) - - # 3. NCCL_ROOT based paths (PyTorch convention) - for env_var in ["NCCL_ROOT", "NCCL_ROOT_DIR"]: - if nccl_root := os.environ.get(env_var): - root = Path(nccl_root) - include_paths.append(root / "include") - library_paths.append(root / "lib") - library_paths.append(root / "lib64") - - # 4. CUDA toolkit paths (some install NCCL alongside CUDA) - # PyTorch's FindNCCL.cmake adds CUDA_TOOLKIT_ROOT_DIR to NCCL_ROOT - for cuda_env in ["CUDA_HOME", "CUDA_PATH", "CUDA_TOOLKIT_ROOT_DIR"]: - if cuda_root := os.environ.get(cuda_env): - root = Path(cuda_root) - include_paths.append(root / "include") - library_paths.append(root / "lib64") - library_paths.append(root / "lib") - - # 5. Standard system paths (compiler defaults) - # These are where apt install libnccl-dev places files - system_include_paths = [ - Path("/usr/include"), - Path("/usr/local/include"), - Path("/usr/local/cuda/include"), - ] - include_paths.extend(system_include_paths) - - system_library_paths = [ - _get_multiarch_lib_path(), # Debian/Ubuntu multiarch (arch-aware) - Path("/usr/lib64"), # RHEL/CentOS - Path("/usr/lib"), - Path("/usr/local/lib"), - Path("/usr/local/cuda/lib64"), - ] - library_paths.extend(system_library_paths) - - return include_paths, library_paths - - -def _find_nccl_header(search_paths: List[Path]) -> Optional[Path]: - """ - Search for nccl.h in the given paths. - - Args: - search_paths: List of directories to search - - Returns: - Path to directory containing nccl.h, or None if not found - """ - for path in search_paths: - header = path / "nccl.h" - if header.exists() and header.is_file(): - return path - return None - - -def _find_nccl_library(search_paths: List[Path]) -> Optional[Path]: - """ - Search for NCCL shared library in the given paths. - - Looks for libnccl.so or libnccl.so.2 (versioned). - - Args: - search_paths: List of directories to search - - Returns: - Path to directory containing libnccl.so, or None if not found - """ - library_names = ["libnccl.so", "libnccl.so.2"] - - for path in search_paths: - for lib_name in library_names: - lib = path / lib_name - if lib.exists(): - return path - return None - - -def check_nccl_available() -> Optional[Tuple[str, str]]: - """ - Check if NCCL headers and library are available when distributed is enabled. - - This function replicates the compiler's header search and CMake's library - detection to ensure validation accurately predicts build success. - - This check is skipped when: - - NVFUSER_BUILD_WITHOUT_DISTRIBUTED env var is set, OR - - PyTorch was built without distributed support (torch._C._has_distributed() == False) - - Returns: - Optional[Tuple[str, str]]: (header_path, library_path) if found, - None if distributed is disabled - - Raises: - PrerequisiteMissingError: If NCCL not found and distributed is enabled - - Example: - >>> result = check_nccl_available() - >>> result - ('/usr/include', '/usr/lib/x86_64-linux-gnu') - - >>> # With distributed disabled: - >>> os.environ['NVFUSER_BUILD_WITHOUT_DISTRIBUTED'] = '1' - >>> check_nccl_available() - None - """ - # Check if distributed is disabled via env var - if os.environ.get("NVFUSER_BUILD_WITHOUT_DISTRIBUTED"): - return None - - # Check if PyTorch was built without distributed support - # This mirrors CMake's USE_DISTRIBUTED check (see gen_nvfuser_version.py) - import torch - - if not torch._C._has_distributed(): - return None - - # Get search paths - include_paths, library_paths = _get_nccl_search_paths() - - # Search for header - header_dir = _find_nccl_header(include_paths) - - # Search for library - library_dir = _find_nccl_library(library_paths) - - # Both must be found for distributed build to succeed - if header_dir is None or library_dir is None: - # Build descriptive error message - missing_parts = [] - if header_dir is None: - missing_parts.append("nccl.h header") - if library_dir is None: - missing_parts.append("libnccl.so library") - - missing_str = " and ".join(missing_parts) - - # Format searched paths for error message (limit to first 5 for readability) - inc_paths_str = "\n".join(f" - {p}" for p in include_paths[:5]) - lib_paths_str = "\n".join(f" - {p}" for p in library_paths[:5]) - - # Check if pip NCCL was expected but missing - pip_note = "" - pip_inc, pip_lib = _get_pip_nccl_paths() - if pip_inc is None: - pip_note = ( - "Note: NCCL is usually bundled with PyTorch's pip package (nvidia-nccl-cu*).\n" - "If you installed PyTorch via pip, try reinstalling it:\n" - " pip install --force-reinstall torch\n\n" - ) - - raise PrerequisiteMissingError( - f"ERROR: NCCL {missing_str} not found.\n\n" - "nvFuser's distributed/multi-GPU support requires NCCL.\n" - "The build will fail because PyTorch headers include .\n\n" - f"{pip_note}" - "Options:\n\n" - "Option 1: Install NCCL system-wide:\n" - " sudo apt install libnccl-dev\n\n" - "Option 2: Build without distributed support:\n" - " export NVFUSER_BUILD_WITHOUT_DISTRIBUTED=1\n" - " pip install --no-build-isolation -e . -v\n\n" - "Searched include paths:\n" - f"{inc_paths_str}\n\n" - "Searched library paths:\n" - f"{lib_paths_str}" - ) - - return (str(header_dir), str(library_dir)) diff --git a/python/tools/prereqs/platform.py b/python/tools/prereqs/platform.py deleted file mode 100644 index 18558c05b9d..00000000000 --- a/python/tools/prereqs/platform.py +++ /dev/null @@ -1,129 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Platform detection utilities for nvFuser build system. - -Detects OS, architecture, and Linux distribution to provide platform-specific -error messages and installation guidance. -""" - -import platform -from typing import Dict, Optional - - -def detect_platform() -> Dict[str, Optional[str]]: - """ - Detect the current platform and return structured information. - - Returns: - dict: Platform information with keys: - - 'os': Operating system (Linux, Darwin, Windows, etc.) - - 'arch': Architecture (x86_64, aarch64, arm64, etc.) - - 'distro': Linux distribution ID (ubuntu, debian, rhel, etc.) or None - - 'distro_version': Distribution version (22.04, 20.04, etc.) or None - - 'distro_name': Human-readable distribution name or None - - 'ubuntu_based': Boolean indicating if this is Ubuntu-based distro - - Example: - >>> info = detect_platform() - >>> print(info['os']) - 'Linux' - >>> print(info['distro']) - 'ubuntu' - """ - system = platform.system() - machine = platform.machine() - - # Initialize distro information - distro_info = {} - distro_id = None - distro_version = None - distro_name = None - ubuntu_based = False - - # Detect Linux distribution from /etc/os-release - if system == "Linux": - try: - with open("/etc/os-release") as f: - for line in f: - line = line.strip() - if "=" in line: - key, value = line.split("=", 1) - # Remove quotes from value - distro_info[key] = value.strip('"').strip("'") - - distro_id = distro_info.get("ID", "unknown") - distro_version = distro_info.get("VERSION_ID", "unknown") - distro_name = distro_info.get("NAME", "unknown") - - # Check if Ubuntu-based (useful for PPA availability) - ubuntu_based = distro_id in ( - "ubuntu", - "debian", - "linuxmint", - "pop", - "zorin", - ) - - except FileNotFoundError: - # /etc/os-release doesn't exist (not a standard Linux or very old system) - distro_id = "unknown" - distro_version = "unknown" - distro_name = "unknown" - except Exception as e: - # Other errors reading/parsing the file - distro_id = f"error: {e}" - distro_version = "unknown" - distro_name = "unknown" - - return { - "os": system, - "arch": machine, - "distro": distro_id, - "distro_version": distro_version, - "distro_name": distro_name, - "ubuntu_based": ubuntu_based, - } - - -def format_platform_info( - platform_info: Optional[Dict[str, Optional[str]]] = None -) -> str: - """ - Format platform information as a human-readable string. - - Args: - platform_info: Platform information dict from detect_platform(). - If None, will call detect_platform() automatically. - - Returns: - str: Formatted platform string like "Linux x86_64 (Ubuntu 22.04)" - - Example: - >>> print(format_platform_info()) - 'Linux x86_64 (Ubuntu 22.04)' - """ - if platform_info is None: - platform_info = detect_platform() - - os_name = platform_info["os"] - arch = platform_info["arch"] - - # Build distro info if available - distro_parts = [] - if platform_info.get("distro") and platform_info["distro"] not in ( - "unknown", - "error", - ): - distro_parts.append(platform_info["distro"].capitalize()) - if ( - platform_info.get("distro_version") - and platform_info["distro_version"] != "unknown" - ): - distro_parts.append(platform_info["distro_version"]) - - if distro_parts: - return f"{os_name} {arch} ({' '.join(distro_parts)})" - else: - return f"{os_name} {arch}" diff --git a/python/tools/prereqs/python_packages.py b/python/tools/prereqs/python_packages.py deleted file mode 100644 index 72f771bba76..00000000000 --- a/python/tools/prereqs/python_packages.py +++ /dev/null @@ -1,375 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Python package validation for nvFuser build system. - -Validates that required Python packages (pybind11, PyTorch) are installed with -correct versions and features. This module handles pybind11 and PyTorch validation. -""" - -import re -import shutil -import subprocess -from typing import Optional, Tuple - -from .exceptions import PrerequisiteMissingError -from .requirements import ( - PYTORCH, - CUDA, - PYBIND11, - format_version, - parse_version, - pytorch_install_instructions, - CUDA_AVAILABLE, -) - - -def _detect_system_cuda() -> Optional[str]: - """ - Detect system CUDA toolkit version via nvcc. - - Returns: - Optional[str]: CUDA version string (e.g., "12.5", "13.0"), or None if not found - - Example: - >>> version = _detect_system_cuda() - >>> version - '12.5' - """ - # Check if nvcc exists - if not shutil.which("nvcc"): - return None - - try: - result = subprocess.run( - ["nvcc", "--version"], capture_output=True, text=True, check=True - ) - - # Parse version from output - # Example: "Cuda compilation tools, release 12.5, V12.5.40" - # or: "release 13.0, V13.0.76" - for line in result.stdout.splitlines(): - match = re.search(r"release (\d+\.\d+)", line.lower()) - if match: - return match.group(1) - - return None - - except (subprocess.CalledProcessError, FileNotFoundError): - return None - - -def _get_torch_install_instructions( - upgrade: bool = False, force_reinstall: bool = False -) -> str: - """ - Generate PyTorch installation instructions with appropriate pip flags. - - Args: - upgrade: If True, adds --upgrade flag - force_reinstall: If True, adds --force-reinstall flag - - Returns: - Formatted installation instructions for available CUDA versions - """ - # Use centralized pytorch_install_instructions with force_reinstall handling - if force_reinstall: - # Custom handling for force_reinstall since the centralized function - # only supports upgrade flag - from .requirements import pytorch_index_url - - lines = [] - for cuda in CUDA_AVAILABLE: - lines.append(f" # For CUDA {format_version(cuda)}:") - lines.append( - f" pip install --force-reinstall torch --index-url {pytorch_index_url(cuda)}" - ) - return "\n".join(lines) - - return pytorch_install_instructions(upgrade=upgrade) - - -def _get_torch_install_for_cuda_major(cuda_major: int) -> str: - """ - Generate PyTorch install command for a specific CUDA major version. - - Finds the best matching CUDA version from CUDA_AVAILABLE. - - Args: - cuda_major: CUDA major version (e.g., 12, 13) - - Returns: - Formatted pip install command string - """ - from .requirements import pytorch_index_url - - # Find matching CUDA version from available versions - matching = [cuda for cuda in CUDA_AVAILABLE if cuda[0] == cuda_major] - if matching: - # Use the first matching version (they're sorted newest first) - cuda = matching[0] - return f" pip install torch --index-url {pytorch_index_url(cuda)}\n" - else: - # Fallback: suggest checking PyTorch website - return f" # Check https://pytorch.org for {CUDA.name} {cuda_major} wheels\n" - - -def check_pybind11_installed() -> str: - """ - Check that pybind11 is installed with CMake support. - - pybind11 with CMake support is required for building nvFuser's Python bindings. - The [global] extra is recommended as it provides CMake integration files. - - In pybind11 3.0+, CMake configuration files are included in the base package, - so [global] is optional but still recommended. - - Returns: - str: pybind11 version string - - Raises: - PrerequisiteMissingError: If pybind11 is not installed, version is too old, - or CMake support is missing - - Example: - >>> version = check_pybind11_installed() - [nvFuser] pybind11: 2.13.6 with CMake support ✓ - >>> version - '2.13.6' - """ - # Check if pybind11 is installed - try: - import pybind11 - except ImportError: - raise PrerequisiteMissingError( - f"ERROR: {PYBIND11.name} is not installed.\n\n" - f"{PYBIND11.name} {PYBIND11.min_display} is required to build nvFuser's Python bindings.\n" - f"The [global] extra provides CMake integration.\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or install {PYBIND11.name} individually:\n" - f" pip install 'pybind11[global]>={PYBIND11.min_str}'" - ) - - # Check version - version = pybind11.__version__ - - # Parse version using centralized parser - try: - detected = parse_version(version) - except ValueError: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {PYBIND11.name} version: {version}\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or reinstall {PYBIND11.name} individually:\n" - f" pip install --force-reinstall 'pybind11[global]>={PYBIND11.min_str}'" - ) - - # Check minimum version requirement - if not PYBIND11.check(detected): - raise PrerequisiteMissingError( - f"ERROR: {PYBIND11.name} {PYBIND11.min_display} is required to build nvFuser.\n" - f"Found: {PYBIND11.name} {version}\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or upgrade {PYBIND11.name} individually:\n" - f" pip install --upgrade 'pybind11[global]>={PYBIND11.min_str}'" - ) - - # Check for CMake support - # In pybind11 2.x and earlier, CMake support may not be available - # In pybind11 3.0+, CMake files are included in the base package - # The [global] extra is recommended but optional in 3.0+ - - # Check if get_cmake_dir() exists and returns a valid path - try: - cmake_dir = pybind11.get_cmake_dir() - except AttributeError: - # Very old pybind11 without CMake support at all - raise PrerequisiteMissingError( - f"ERROR: {PYBIND11.name} is installed without CMake support.\n\n" - f"Found: {PYBIND11.name} {version} (too old)\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or upgrade {PYBIND11.name} individually:\n" - f" pip install --upgrade 'pybind11[global]>={PYBIND11.min_str}'" - ) - - # Verify the cmake directory exists and contains config files - import os - - if not os.path.exists(cmake_dir) or not os.path.exists( - os.path.join(cmake_dir, "pybind11Config.cmake") - ): - raise PrerequisiteMissingError( - f"ERROR: {PYBIND11.name} CMake configuration is missing or invalid.\n\n" - f"Found: {PYBIND11.name} {version} (CMake dir: {cmake_dir})\n\n" - f"Install all build dependencies:\n" - f" pip install -r requirements.txt\n\n" - f"Or reinstall {PYBIND11.name} individually:\n" - f" pip install --force-reinstall 'pybind11[global]>={PYBIND11.min_str}'" - ) - - return version - - -def check_torch_installed() -> Tuple[str, str]: - """ - Check that PyTorch with CUDA support is installed. - - Note: CUDA versions earlier than the minimum have known compatibility issues - with nvFuser (missing Float8 types). Use the minimum CUDA version or newer. - - nvFuser requires PyTorch compiled with CUDA support. CPU-only PyTorch - builds are not supported. The CUDA version must match the system CUDA toolkit - that will be used to build nvFuser. - - Returns: - Tuple[str, str]: (torch_version, cuda_version_str) - - Raises: - PrerequisiteMissingError: If PyTorch is not installed, version is too old, - is CPU-only, or has CUDA below minimum - - Example: - >>> version, cuda = check_torch_installed() - [nvFuser] PyTorch: X.Y.Z with CUDA X.Y ✓ - >>> version, cuda - ('X.Y.Z', 'X.Y') # Actual versions detected at runtime - """ - # Check if PyTorch is installed - try: - import torch - except ImportError: - raise PrerequisiteMissingError( - f"ERROR: {PYTORCH.name} is not installed.\n\n" - f"nvFuser requires {PYTORCH.name} {PYTORCH.min_display} with {CUDA.name} {CUDA.min_display} support.\n" - f"The {CUDA.name} version must match your system CUDA toolkit.\n" - f"Check your system {CUDA.name} version: nvcc --version\n\n" - f"Install {PYTORCH.name} with {CUDA.name} support:\n" - f"{_get_torch_install_instructions()}\n\n" - f"Visit https://pytorch.org for more installation options." - ) - - # Get PyTorch version (remove any +cu130 suffix) - torch_version = torch.__version__.split("+")[0] - - # Parse version using centralized parser - try: - torch_detected = parse_version(torch_version) - except ValueError: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {PYTORCH.name} version: {torch.__version__}\n\n" - f"Please reinstall {PYTORCH.name}:\n" - f"{_get_torch_install_instructions(force_reinstall=True)}" - ) - - # Check minimum version requirement - if not PYTORCH.check(torch_detected): - raise PrerequisiteMissingError( - f"ERROR: {PYTORCH.name} {PYTORCH.min_display} is required to build nvFuser.\n" - f"Found: {PYTORCH.name} {torch_version}\n\n" - f"Upgrade {PYTORCH.name} (match your system {CUDA.name} version):\n" - f"{_get_torch_install_instructions(upgrade=True)}" - ) - - # Check if PyTorch has CUDA support (not CPU-only) - cuda_version_str = torch.version.cuda - if cuda_version_str is None: - raise PrerequisiteMissingError( - f"ERROR: {PYTORCH.name} is CPU-only. nvFuser requires {CUDA.name}-enabled {PYTORCH.name}.\n\n" - f"You have installed {PYTORCH.name} without {CUDA.name} support. This is a common mistake.\n" - f"nvFuser needs {PYTORCH.name} compiled with {CUDA.name} {CUDA.min_display} to build and run correctly.\n" - f"The {CUDA.name} version must match your system CUDA toolkit.\n" - f"Check your system {CUDA.name} version: nvcc --version\n\n" - f"Install {PYTORCH.name} with {CUDA.name} support:\n" - f"{_get_torch_install_instructions()}" - ) - - # Parse CUDA version using centralized parser - try: - cuda_detected = parse_version(cuda_version_str) - except ValueError: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {CUDA.name} version from {PYTORCH.name}: {cuda_version_str}\n\n" - f"Please reinstall {PYTORCH.name} with {CUDA.name} {CUDA.min_display}:\n" - f"{_get_torch_install_instructions(force_reinstall=True)}" - ) - - # Check CUDA version requirement - # CUDA versions earlier than minimum have known issues (missing Float8 types) - if not CUDA.check(cuda_detected): - raise PrerequisiteMissingError( - f"ERROR: {PYTORCH.name} with {CUDA.name} {CUDA.min_display} is required to build nvFuser.\n" - f"Found: {PYTORCH.name} {torch_version} with {CUDA.name} {cuda_version_str}\n\n" - f"{CUDA.name} versions earlier than {CUDA.min_str} have known compatibility issues with nvFuser\n" - f"(missing Float8 types cause build errors).\n\n" - f"Please upgrade {PYTORCH.name} to {CUDA.name} {CUDA.min_display}:\n" - f"{_get_torch_install_instructions(upgrade=True)}" - ) - - # Detect and validate system CUDA toolkit - system_cuda = _detect_system_cuda() - cuda_major = cuda_detected[0] - cuda_minor = cuda_detected[1] if len(cuda_detected) > 1 else 0 - - if system_cuda is None: - # System CUDA not found - this is a problem - raise PrerequisiteMissingError( - f"ERROR: System {CUDA.name} toolkit not found.\n\n" - f"{PYTORCH.name} has {CUDA.name} {cuda_version_str} support, but nvcc is not in PATH.\n" - f"nvFuser needs the {CUDA.name} toolkit (nvcc compiler) to build.\n\n" - f"Install {CUDA.name} toolkit {cuda_major}.{cuda_minor} (major version must match {PYTORCH.name}):\n" - f" # Check available versions:\n" - f" # https://developer.nvidia.com/cuda-downloads\n" - f" # For Ubuntu 22.04 with {CUDA.name} {cuda_major}.{cuda_minor}:\n" - f" wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb\n" - f" sudo dpkg -i cuda-keyring_1.0-1_all.deb\n" - f" sudo apt update\n" - f" sudo apt install cuda-toolkit-{cuda_major}-{cuda_minor} # Match {PYTORCH.name} {CUDA.name} major version\n" - ) - - # Parse system CUDA version - try: - system_cuda_detected = parse_version(system_cuda) - system_cuda_major = system_cuda_detected[0] - system_cuda_minor = ( - system_cuda_detected[1] if len(system_cuda_detected) > 1 else 0 - ) - except ValueError: - raise PrerequisiteMissingError( - f"ERROR: Could not parse {CUDA.name} versions.\n" - f"{PYTORCH.name} {CUDA.name}: {cuda_version_str}\n" - f"System {CUDA.name}: {system_cuda}\n\n" - f"Please verify your {CUDA.name} installations are correct." - ) - - # Validate major version match (REQUIRED) - if cuda_major != system_cuda_major: - raise PrerequisiteMissingError( - f"ERROR: {CUDA.name} version mismatch between {PYTORCH.name} and system.\n\n" - f"{PYTORCH.name} {CUDA.name}: {cuda_version_str} (major: {cuda_major})\n" - f"System {CUDA.name}: {system_cuda} (major: {system_cuda_major})\n\n" - f"nvFuser requires the {CUDA.name} major versions to match.\n" - f"Code compiled with {CUDA.name} {system_cuda_major} cannot link with {PYTORCH.name} built for {CUDA.name} {cuda_major}.\n\n" - f"Solutions:\n" - f" 1. Install {PYTORCH.name} matching your system {CUDA.name} {system_cuda_major}:\n" - f"{_get_torch_install_for_cuda_major(system_cuda_major)}" - f" OR\n" - f" 2. Install system {CUDA.name} toolkit matching {PYTORCH.name} {CUDA.name} {cuda_major}:\n" - f" See: https://developer.nvidia.com/cuda-downloads\n" - ) - - # Check minor version (WARNING only, not error) - if cuda_minor != system_cuda_minor: - print(f"[nvFuser] WARNING: {CUDA.name} minor version mismatch") - print(f" {PYTORCH.name} {CUDA.name}: {cuda_version_str}") - print(f" System {CUDA.name}: {system_cuda}") - print(f" Major versions match ({cuda_major}), but minor versions differ.") - print( - " Build should work, but consider matching minor versions for best compatibility." - ) - - return torch_version, cuda_version_str diff --git a/python/tools/prereqs/python_version.py b/python/tools/prereqs/python_version.py deleted file mode 100644 index 8785015da2d..00000000000 --- a/python/tools/prereqs/python_version.py +++ /dev/null @@ -1,114 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Python version validation for nvFuser build. - -nvFuser requires Python 3.8+ for modern type hints and language features. -This module detects the Python version and provides actionable error messages -when the version is too old. -""" - -import sys -from typing import Tuple - -from .exceptions import PrerequisiteMissingError -from .platform import detect_platform -from .requirements import PYTHON, format_version - - -def check_python_version() -> Tuple[int, int, int]: - """ - Check that Python version meets nvFuser's minimum requirement. - - Returns: - Tuple[int, int, int]: Python version as (major, minor, patch) tuple - - Raises: - PrerequisiteMissingError: If Python version is below minimum - - Example: - >>> version = check_python_version() - [nvFuser] Python: 3.10.12 ✓ - >>> version - (3, 10, 12) - """ - version = sys.version_info - major, minor, patch = version.major, version.minor, version.micro - detected = (major, minor, patch) - - # Check minimum version requirement - if not PYTHON.check(detected): - platform_info = detect_platform() - recommended = PYTHON.recommended_str - error_msg = ( - f"ERROR: {PYTHON.name} {PYTHON.min_display} is required to build nvFuser.\n" - f"Found: {PYTHON.name} {format_version(detected)}\n\n" - f"nvFuser uses modern Python features including:\n" - f" - Type hints (PEP 484, 585, 604)\n" - f" - Assignment expressions (PEP 572)\n" - f" - Positional-only parameters (PEP 570)\n\n" - f"{PYTHON.name} {PYTHON.min_display} is required; {PYTHON.name} {recommended} is recommended and used in the commands below.\n\n" - f"To install {PYTHON.name} {recommended}:\n" - ) - - # Add platform-specific installation guidance - if platform_info["os"] == "Linux": - if platform_info.get("ubuntu_based", False): - error_msg += ( - f"\n" - f"On Ubuntu or Ubuntu-based distros:\n" - f" # Step 1: Install {PYTHON.name} {recommended} and venv support\n" - f" sudo apt update\n" - f" sudo apt install python{recommended} python3-venv python{recommended}-venv python{recommended}-dev\n" - f" # Note: Some packages may not exist on all releases; install what's available\n" - f"\n" - f" # Step 2: Create virtual environment\n" - f" python{recommended} -m venv nvfuser_env\n" - f" source nvfuser_env/bin/activate\n" - f" python -m pip install --upgrade pip\n" - f"\n" - f" If python{recommended}-venv is not available, install the generic python3-venv package\n" - f" or follow your distribution's Python setup guide.\n" - ) - else: - error_msg += ( - f"\n" - f"On other Linux distributions:\n" - f" # Step 1: Install {PYTHON.name} {recommended}+ and development headers using your package manager\n" - f" # Example (RHEL/CentOS/Fedora):\n" - f" # sudo yum install python{recommended} python{recommended}-devel\n" - f"\n" - f" # Step 2: Create virtual environment\n" - f" python{recommended} -m venv nvfuser_env\n" - f" source nvfuser_env/bin/activate\n" - f" python -m pip install --upgrade pip\n" - f"\n" - f" If your distro does not package {PYTHON.name} {recommended}, consider using pyenv, Conda,\n" - f" or your distro's documented method to install a newer Python.\n" - ) - elif platform_info["os"] == "Darwin": - error_msg += ( - f"\n" - f"On macOS:\n" - f" # Step 1: Install {PYTHON.name} {recommended} via Homebrew\n" - f" brew install python@{recommended}\n" - f"\n" - f" # Step 2: Create virtual environment\n" - f" python{recommended} -m venv nvfuser_env\n" - f" source nvfuser_env/bin/activate\n" - f" python -m pip install --upgrade pip\n" - ) - - # Add conda as alternative (common in ML/PyTorch community) - error_msg += ( - f"\n" - f"Alternative - using conda/miniconda:\n" - f" conda create -n nvfuser python={recommended}\n" - f" conda activate nvfuser\n" - f" python -m pip install --upgrade pip\n" - ) - - raise PrerequisiteMissingError(error_msg) - - return (major, minor, patch) diff --git a/python/tools/prereqs/requirements.py b/python/tools/prereqs/requirements.py deleted file mode 100644 index 69cc669536b..00000000000 --- a/python/tools/prereqs/requirements.py +++ /dev/null @@ -1,269 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Central source of truth for nvFuser build requirements. - -UPDATE VERSIONS HERE when requirements change. All validation modules -import from this file, so changes propagate automatically. - -Example: - from .requirements import CUDA, LLVM, parse_version - - detected = parse_version("18.1.8") - if not LLVM.check(detected): - raise PrerequisiteMissingError(f"{LLVM.name} {LLVM.min_display} required") -""" - -import platform -import re -from dataclasses import dataclass -from typing import Optional, Tuple - - -# ============================================================================= -# VERSION CONVERSION UTILITIES -# ============================================================================= - - -def parse_version(version_str: str) -> Tuple[int, ...]: - """ - Parse version string to tuple. - - Args: - version_str: Version string like "3.8", "18.1.8", "13", "18.1.8git" - - Returns: - Tuple of integers: (3, 8), (18, 1, 8), (13,), (18, 1, 8) - - Examples: - >>> parse_version("3.8") - (3, 8) - >>> parse_version("18.1.8") - (18, 1, 8) - >>> parse_version("13") - (13,) - >>> parse_version("18.1.8git") # strips non-numeric suffix - (18, 1, 8) - - Raises: - ValueError: If version string cannot be parsed - """ - # Strip common suffixes like "git", "rc1", "+cu128", etc. - clean = re.match(r"^[\d.]+", version_str.strip()) - if not clean: - raise ValueError(f"Cannot parse version: {version_str}") - - parts = clean.group().rstrip(".").split(".") - return tuple(int(p) for p in parts if p) - - -def format_version(version: Tuple[int, ...]) -> str: - """ - Format version tuple to string. - - Args: - version: Tuple of integers like (3, 8), (18, 1, 8), (13,) - - Returns: - Version string: "3.8", "18.1.8", "13" - - Examples: - >>> format_version((3, 8)) - '3.8' - >>> format_version((18, 1, 8)) - '18.1.8' - >>> format_version((13,)) - '13' - """ - return ".".join(map(str, version)) - - -# ============================================================================= -# REQUIREMENT DATACLASS -# ============================================================================= - - -@dataclass -class Requirement: - """ - A version requirement with optional recommended version for downloads. - - Attributes: - name: Human-readable name (e.g., "CMake", "LLVM") - min_version: Minimum required version tuple, or None for "any version" - recommended: Recommended version tuple for download URLs (optional) - - Examples: - >>> CMAKE = Requirement("CMake", (3, 18)) - >>> CMAKE.min_str - '3.18' - >>> CMAKE.min_display - '3.18+' - >>> CMAKE.check((3, 22, 1)) - True - - >>> NINJA = Requirement("Ninja", None) # Any version - >>> NINJA.min_display - 'any version' - >>> NINJA.check((1, 0, 0)) - True - - >>> LLVM = Requirement("LLVM", (18, 1), recommended=(18, 1, 8)) - >>> LLVM.min_str - '18.1' - >>> LLVM.recommended_str - '18.1.8' - """ - - name: str - min_version: Optional[Tuple[int, ...]] - recommended: Optional[Tuple[int, ...]] = None - - @property - def min_str(self) -> str: - """Minimum version as string: '3.18' or 'any'""" - if self.min_version is None: - return "any" - return format_version(self.min_version) - - @property - def min_display(self) -> str: - """Minimum version for display: '3.18+' or 'any version'""" - if self.min_version is None: - return "any version" - return f"{self.min_str}+" - - @property - def recommended_str(self) -> str: - """Recommended version as string, falls back to min_str""" - if self.recommended is None: - return self.min_str - return format_version(self.recommended) - - def check(self, detected: Tuple[int, ...]) -> bool: - """ - Check if detected version meets minimum requirement. - - Args: - detected: Detected version tuple (e.g., from parse_version) - - Returns: - True if detected >= min_version (or min_version is None) - - Note: - Compares only as many parts as min_version specifies. - So (3, 22, 1) >= (3, 18) compares (3, 22) >= (3, 18) -> True - """ - if self.min_version is None: - return True - # Compare only as many parts as min_version specifies - return detected[: len(self.min_version)] >= self.min_version - - -# ============================================================================= -# VERSION REQUIREMENTS - UPDATE THESE WHEN VERSIONS CHANGE -# ============================================================================= - -PYTHON = Requirement("Python", (3, 8), recommended=(3, 10)) -CMAKE = Requirement("CMake", (3, 18)) -NINJA = Requirement("Ninja", None) # Any version accepted -PYTORCH = Requirement("PyTorch", (2, 0)) -CUDA = Requirement("CUDA", (12, 8)) # Minimum PyTorch CUDA version -PYBIND11 = Requirement("pybind11", (2, 0)) -GCC = Requirement("GCC", (13,)) # Major version only; requires header -CLANG = Requirement("Clang", (19,)) # Major version only; Clang 19 has support -LLVM = Requirement("LLVM", (18, 1), recommended=(18, 1, 8)) - - -# ============================================================================= -# AVAILABLE CUDA VERSIONS - For install instructions -# ============================================================================= - -# PyTorch wheel CUDA versions currently available (newest first) -CUDA_AVAILABLE = [(13, 0), (12, 8)] - - -# ============================================================================= -# URL GENERATORS -# ============================================================================= - - -def cuda_wheel_suffix(cuda: Tuple[int, int]) -> str: - """ - Convert CUDA version tuple to PyTorch wheel suffix. - - Examples: - >>> cuda_wheel_suffix((12, 8)) - 'cu128' - >>> cuda_wheel_suffix((13, 0)) - 'cu130' - """ - return f"cu{cuda[0]}{cuda[1]}" - - -def pytorch_index_url(cuda: Tuple[int, int]) -> str: - """ - Generate PyTorch wheel index URL for a CUDA version. - - Examples: - >>> pytorch_index_url((12, 8)) - 'https://download.pytorch.org/whl/cu128' - >>> pytorch_index_url((13, 0)) - 'https://download.pytorch.org/whl/cu130' - """ - return f"https://download.pytorch.org/whl/{cuda_wheel_suffix(cuda)}" - - -def llvm_download_url(version: Tuple[int, ...] = None) -> str: - """ - Generate LLVM prebuilt binary download URL. - - Args: - version: LLVM version tuple, defaults to LLVM.recommended - - Returns: - GitHub release URL for Ubuntu 18.04 x86_64 binary - - Example: - >>> llvm_download_url() - 'https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04.tar.xz' - """ - v = format_version(version) if version else LLVM.recommended_str - machine = platform.machine() - if machine == "x86_64": - return ( - f"https://github.com/llvm/llvm-project/releases/download/" - f"llvmorg-{v}/clang+llvm-{v}-x86_64-linux-gnu-ubuntu-18.04.tar.xz" - ) - elif machine == "aarch64": - return ( - f"https://github.com/llvm/llvm-project/releases/download/" - f"llvmorg-{v}/clang+llvm-{v}-aarch64-linux-gnu.tar.xz" - ) - elif machine.startswith("arm64"): - # 64-bit ARM (macos) - return ( - f"https://github.com/llvm/llvm-project/releases/download/" - f"llvmorg-{v}/clang+llvm-{v}-arm64-apple-macos11.tar.xz" - ) - else: - raise NotImplementedError(f"LLVM does not support machine type: {machine}") - - -def pytorch_install_instructions(upgrade: bool = False) -> str: - """ - Generate PyTorch installation instructions for all available CUDA versions. - - Args: - upgrade: If True, adds --upgrade flag - - Returns: - Multi-line string with pip install commands - """ - flag = " --upgrade" if upgrade else "" - lines = [] - for cuda in CUDA_AVAILABLE: - lines.append(f" # For CUDA {format_version(cuda)}:") - lines.append(f" pip install{flag} torch --index-url {pytorch_index_url(cuda)}") - return "\n".join(lines) diff --git a/python/tools/prereqs/validate.py b/python/tools/prereqs/validate.py deleted file mode 100644 index 3f5f55f6e97..00000000000 --- a/python/tools/prereqs/validate.py +++ /dev/null @@ -1,168 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -""" -Prerequisite validation orchestrator for nvFuser build. - -This module coordinates all prerequisite checks in the correct order and -provides a summary of the validation results. It is designed to be called -from setup.py before the build begins. - -The validation follows a fail-fast approach: if any prerequisite check fails, -it raises PrerequisiteMissingError immediately with actionable error messages. -""" - -import sys -from typing import Any, Dict - -from .platform import detect_platform, format_platform_info -from .python_version import check_python_version -from .build_tools import check_cmake_version, check_ninja_installed -from .python_packages import check_pybind11_installed, check_torch_installed -from .git import check_git_submodules_initialized -from .compiler import validate_compiler -from .nccl import check_nccl_available -from .llvm import check_llvm_installed -from .requirements import ( - PYTHON, - CMAKE, - NINJA, - PYTORCH, - CUDA, - PYBIND11, - GCC, - CLANG, - LLVM, - format_version, -) - - -def validate_prerequisites() -> Dict[str, Any]: - """ - Validate all nvFuser build prerequisites in the correct order. - - This function runs all prerequisite checks sequentially and collects - metadata about the system. If any check fails, it raises PrerequisiteMissingError - with detailed instructions on how to fix the issue. - - Check order (fail-fast after platform detection): - 1. Platform detection (informational only) - 2. Python - 3. CMake - 4. Ninja - 5. PyTorch with CUDA (includes system CUDA validation) - 6. pybind11 - 7. Git submodules initialized - 8. C++ compiler (GCC 13+ or Clang 19+) with header - 9. NCCL headers/library (if distributed enabled) - 10. LLVM - - Returns: - Dict[str, Any]: Dictionary containing metadata about all detected prerequisites - - Raises: - PrerequisiteMissingError: If any prerequisite is missing or has wrong version - - Example: - >>> metadata = validate_prerequisites() - [nvFuser] Platform: Linux x86_64, Ubuntu 22.04 - [nvFuser] ✓ Python X.Y.Z >= {PYTHON.min_str} - [nvFuser] ✓ CMake X.Y.Z >= {CMAKE.min_str} - [nvFuser] ✓ Ninja X.Y.Z (any version) - [nvFuser] ✓ PyTorch X.Y with CUDA X.Y >= {PYTORCH.min_str} with CUDA {CUDA.min_str} - [nvFuser] ✓ pybind11 X.Y.Z >= {PYBIND11.min_str} with CMake support - [nvFuser] ✓ Git submodules: N initialized - [nvFuser] ✓ GCC X.Y.Z >= {GCC.min_str} with header - [nvFuser] ✓ NCCL found (headers: /path/to/nccl/include) - [nvFuser] ✓ LLVM X.Y.Z >= {LLVM.min_str} - - ✓✓✓ All prerequisites validated ✓✓✓ - - Note: Version requirements are defined in requirements.py. - - >>> metadata.keys() - dict_keys(['platform', 'python', 'cmake', 'ninja', 'torch', 'cuda', - 'pybind11', 'git_submodules', 'compiler', 'nccl', 'llvm']) - """ - # Prominent banner - start of validation - print("\n" + "=" * 60) - print("[nvFuser] Validating build prerequisites...") - print("=" * 60) - sys.stdout.flush() - - # Platform detection (informational only - doesn't fail) - platform_info = detect_platform() - platform_str = format_platform_info(platform_info) - print(f"[nvFuser] Platform: {platform_str}") - - # Python version check - python_ver = check_python_version() - print(f"[nvFuser] ✓ {PYTHON.name} {format_version(python_ver)} >= {PYTHON.min_str}") - - # Build tools checks - cmake_ver = check_cmake_version() - print(f"[nvFuser] ✓ {CMAKE.name} {format_version(cmake_ver)} >= {CMAKE.min_str}") - - ninja_ver = check_ninja_installed() - ninja_display = f">= {NINJA.min_str}" if NINJA.min_version else "(any version)" - print(f"[nvFuser] ✓ {NINJA.name} {ninja_ver} {ninja_display}") - - # PyTorch and CUDA check (includes system CUDA validation) - torch_ver, cuda_ver = check_torch_installed() - print( - f"[nvFuser] ✓ {PYTORCH.name} {torch_ver} with {CUDA.name} {cuda_ver} >= {PYTORCH.min_str} with {CUDA.name} {CUDA.min_str}" - ) - # System CUDA validation messages are printed by check_torch_installed() - - # pybind11 check - pybind11_ver = check_pybind11_installed() - print( - f"[nvFuser] ✓ {PYBIND11.name} {pybind11_ver} >= {PYBIND11.min_str} with CMake support" - ) - - # Git submodules check - submodules = check_git_submodules_initialized() - if submodules: - print(f"[nvFuser] ✓ Git submodules: {len(submodules)} initialized") - else: - print("[nvFuser] ✓ Git submodules: N/A (not a git repository)") - - # C++ compiler validation (GCC 13+ or Clang 19+) - compiler_type, compiler_ver = validate_compiler() - req = CLANG if compiler_type == "clang" else GCC - print( - f"[nvFuser] ✓ {req.name} {format_version(compiler_ver)} >= {req.min_str} with header" - ) - - # NCCL check (only when distributed is enabled) - nccl_result = check_nccl_available() - if nccl_result: - nccl_inc, nccl_lib = nccl_result - print(f"[nvFuser] ✓ NCCL found (headers: {nccl_inc})") - else: - print("[nvFuser] ✓ NCCL: skipped (distributed disabled)") - - # LLVM check - llvm_ver = check_llvm_installed() - print(f"[nvFuser] ✓ {LLVM.name} {llvm_ver} >= {LLVM.min_str}") - - # Success summary with prominent banner - print("\n" + "=" * 60) - print("✓✓✓ All prerequisites validated ✓✓✓") - print("=" * 60 + "\n") - sys.stdout.flush() - - # Return collected metadata - return { - "platform": platform_info, - "python": python_ver, - "cmake": cmake_ver, - "ninja": ninja_ver, - "torch": torch_ver, - "cuda": cuda_ver, - "pybind11": pybind11_ver, - "git_submodules": submodules, - "compiler": (compiler_type, compiler_ver), - "nccl": nccl_result, - "llvm": llvm_ver, - } diff --git a/python/utils.py b/python/utils.py index 027db531050..701f6f33317 100644 --- a/python/utils.py +++ b/python/utils.py @@ -9,8 +9,6 @@ import sys import shutil from dataclasses import dataclass, field -from pathlib import Path -from typing import Optional import setuptools.command.build_ext @@ -413,32 +411,6 @@ def get_cmake_bin(): return "cmake" -def get_pip_nccl_include_dir() -> Optional[str]: - """ - Find NCCL include directory from pip-installed nvidia-nccl-cu* package. - - PyTorch's pip package depends on nvidia-nccl-cu* which bundles NCCL headers - at {site-packages}/nvidia/nccl/include/nccl.h. This path is needed for - compiling nvFuser's distributed support. - - Note: Similar logic exists in tools/prereqs/nccl.py::_get_pip_nccl_paths() - for validation. That function returns both include AND lib paths for complete - prerequisite checking, while this function only needs the include path to pass - to CMake. The duplication is intentional to keep validation and build logic - independent. - - Returns: - Path to NCCL include directory if found, None otherwise - """ - for site_path in sys.path: - if not site_path: - continue - nccl_include = Path(site_path) / "nvidia" / "nccl" / "include" - if (nccl_include / "nccl.h").exists(): - return str(nccl_include) - return None - - def cmake(config, relative_path): from tools.memory import get_available_memory_gb @@ -505,12 +477,6 @@ def on_or_off(flag: bool) -> str: "-B", cmake_build_dir, ] - # Add NCCL include path from pip-bundled nvidia-nccl-cu* package if available - # This is needed for compiling distributed support when using pip-installed PyTorch - if not config.build_without_distributed: - nccl_include = get_pip_nccl_include_dir() - if nccl_include: - cmd_str.append(f"-DNCCL_INCLUDE_DIR={nccl_include}") if config.nvmmh_include_dir: cmd_str.append(f"-DNVMMH_INCLUDE_DIR={config.nvmmh_include_dir}") if not config.no_ninja: diff --git a/requirements.txt b/requirements.txt index 8f6c22da50c..7f3639c99fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,2 @@ -# Build dependencies (required before running setup.py) -cmake>=3.18 +pybind11[global] ninja -pybind11[global]>=2.0 diff --git a/setup.py b/setup.py index 25db80b1e11..e1cd19eb726 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,6 @@ # TODO Remove tools/gen_nvfuser_version.py symbolic link to python/tools/gen_nvfuser_version.py # TODO Remove tools/memory.py symbolic link to python/tools/memory.py -import os import sys @@ -89,17 +88,6 @@ def version_tag(config): def main(): - # Deprecation warning - recommend pip install method - print( - "\n" + "=" * 70 + "\n" - "DEPRECATED: 'python setup.py develop' is deprecated.\n" - "Recommended: pip install --no-build-isolation -e python -v\n" - + "=" * 70 - + "\n", - file=sys.stderr, - ) - sys.stdout.flush() - # Parse arguments using argparse config, forward_args = create_build_config() @@ -107,24 +95,6 @@ def main(): # only disables BUILD_SETUP, but keep the argument for setuptools config.build_setup = False - # Prerequisite validation (can be skipped with NVFUSER_BUILD_SKIP_VALIDATION) - if config.build_setup and not os.environ.get("NVFUSER_BUILD_SKIP_VALIDATION"): - try: - from python.tools.prereqs import validate_prerequisites - - validate_prerequisites() - except ImportError as e: - # Prerequisite validation not available (shouldn't happen in dev) - print(f"WARNING: Could not import prerequisite validation: {e}") - except Exception as e: - # Prerequisite check failed - print(f"\n{e}\n", file=sys.stderr) - sys.exit(1) - elif os.environ.get("NVFUSER_BUILD_SKIP_VALIDATION"): - print( - "[nvFuser] Skipping prerequisite validation (NVFUSER_BUILD_SKIP_VALIDATION set)" - ) - if config.cpp_standard < 20: raise ValueError("nvfuser requires C++20 standard or higher")