diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 3d08c9084..e322693b5 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -120,7 +120,7 @@ def get_native_library() -> BNBNativeLibrary: hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2]) HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}" - BNB_BACKEND = "ROCM" + BNB_BACKEND = "ROCm" else: HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0 BNB_HIP_VERSION_SHORT = "" diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 6c66c6219..014b753a9 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -5,7 +5,7 @@ import torch -from bitsandbytes.cextension import BNB_BACKEND, HIP_ENVIRONMENT, get_cuda_bnb_library_path +from bitsandbytes.cextension import HIP_ENVIRONMENT, get_cuda_bnb_library_path from bitsandbytes.consts import NONPYTORCH_DOC_URL from bitsandbytes.cuda_specs import CUDASpecs from bitsandbytes.diagnostics.utils import print_dedented @@ -32,16 +32,18 @@ "_", # current Python interpreter } -CUDA_RUNTIME_LIB_PATTERNS = ( - "cudart64*.dll", # Windows - "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. - "nvcuda*.dll", # Windows -) +logger = logging.getLogger(__name__) -if HIP_ENVIRONMENT: - CUDA_RUNTIME_LIB_PATTERNS = ("libamdhip64.so*",) -logger = logging.getLogger(__name__) +def get_runtime_lib_patterns() -> tuple: + if HIP_ENVIRONMENT: + return ("libamdhip64.so*",) + else: + return ( + "cudart64*.dll", # Windows + "libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc. + "nvcuda*.dll", # Windows + ) def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]: @@ -58,8 +60,8 @@ def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path continue except OSError: # Assume an esoteric error trying to poke at the directory pass - for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS: - for pth in dir.rglob(lib_pattern): + for lib_pattern in get_runtime_lib_patterns(): + for pth in dir.glob(lib_pattern): if pth.is_file() and not pth.is_symlink(): yield pth except (OSError, PermissionError): @@ -107,59 +109,38 @@ def find_cudart_libraries() -> Iterator[Path]: yield from find_cuda_libraries_in_path_list(value) -def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: - if not HIP_ENVIRONMENT: - print( - f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " - f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", - ) - else: - print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") +def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: + print( + f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, " + f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.", + ) binary_path = get_cuda_bnb_library_path(cuda_specs) if not binary_path.exists(): - if not HIP_ENVIRONMENT: - print_dedented( - f""" - Library not found: {binary_path}. Maybe you need to compile it from source? - If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, - for example, `make CUDA_VERSION=113`. - - The CUDA version for the compile might depend on your conda install, if using conda. - Inspect CUDA version via `conda list | grep cuda`. - """, - ) - else: - print_dedented( - f""" - Library not found: {binary_path}. - Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION - in PyTorch Settings matches your ROCM install. If not, reinstall PyTorch for your ROCm version - and rebuild bitsandbytes. - """, - ) + print_dedented( + f""" + Library not found: {binary_path}. Maybe you need to compile it from source? + If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`, + for example, `make CUDA_VERSION=113`. + + The CUDA version for the compile might depend on your conda install, if using conda. + Inspect CUDA version via `conda list | grep cuda`. + """, + ) cuda_major, cuda_minor = cuda_specs.cuda_version_tuple - if not HIP_ENVIRONMENT: - if cuda_major < 11: - print_dedented( - """ - WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). - You will be only to use 8-bit optimizers and quantization routines! - """, - ) - - print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") - else: - if (cuda_major, cuda_minor) < (6, 1): - print_dedented( - """ - WARNING: bitandbytes is fully supported only from ROCm 6.1. - """, - ) + if cuda_major < 11: + print_dedented( + """ + WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8(). + You will be only to use 8-bit optimizers and quantization routines! + """, + ) + + print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}") # 7.5 is the minimum CC for cublaslt - if not cuda_specs.has_cublaslt and not HIP_ENVIRONMENT: + if not cuda_specs.has_cublaslt: print_dedented( """ WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! @@ -173,44 +154,88 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: # (2) Multiple CUDA versions installed -def print_cuda_runtime_diagnostics() -> None: +def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: + print(f"PyTorch settings found: ROCM_VERSION={cuda_specs.cuda_version_string}") + + binary_path = get_cuda_bnb_library_path(cuda_specs) + if not binary_path.exists(): + print_dedented( + f""" + Library not found: {binary_path}. + Maybe you need to compile it from source? If you compiled from source, check that ROCM_VERSION + in PyTorch Settings matches your ROCm install. If not, reinstall PyTorch for your ROCm version + and rebuild bitsandbytes. + """, + ) + + hip_major, hip_minor = cuda_specs.cuda_version_tuple + if (hip_major, hip_minor) < (6, 1): + print_dedented( + """ + WARNING: bitsandbytes is fully supported only from ROCm 6.1. + """, + ) + + +def print_diagnostics(cuda_specs: CUDASpecs) -> None: + if HIP_ENVIRONMENT: + _print_hip_diagnostics(cuda_specs) + else: + _print_cuda_diagnostics(cuda_specs) + + +def _print_cuda_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print(f"{BNB_BACKEND} SETUP: WARNING! {BNB_BACKEND} runtime files not found in any environmental path.") + print("WARNING! CUDA runtime files not found in any environmental path.") elif len(cudart_paths) > 1: - backend_version = torch.version.cuda if not HIP_ENVIRONMENT else torch.version.hip print_dedented( f""" - Found duplicate {BNB_BACKEND} runtime files (see below). + Found duplicate CUDA runtime files (see below). + + We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, + but this might mismatch with the CUDA version that is needed for bitsandbytes. + To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. + + For example, if you want to use the CUDA version 122, + BNB_CUDA_VERSION=122 python ... + + OR set the environmental variable in your .bashrc: + export BNB_CUDA_VERSION=122 - We select the PyTorch default {BNB_BACKEND} runtime, which is {backend_version}, - but this might mismatch with the {BNB_BACKEND} version that is needed for bitsandbytes. + In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, + """, + ) + for pth in cudart_paths: + print(f"* Found CUDA runtime at: {pth}") + + +def _print_hip_runtime_diagnostics() -> None: + cudart_paths = list(find_cudart_libraries()) + if not cudart_paths: + print("WARNING! ROCm runtime files not found in any environmental path.") + elif len(cudart_paths) > 1: + print_dedented( + f""" + Found duplicate ROCm runtime files (see below). + + We select the PyTorch default ROCm runtime, which is {torch.version.hip}, + but this might mismatch with the ROCm version that is needed for bitsandbytes. + + To resolve it, install PyTorch built for the ROCm version you want to use + + and set LD_LIBRARY_PATH to your ROCm install path, e.g. + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-6.1.2/lib, """, ) - if not HIP_ENVIRONMENT: - print_dedented( - """ - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, - """, - ) - else: - print_dedented( - """ - To resolve it, install PyTorch built for the ROCm version you want to use - - and set LD_LIBRARY_PATH to your ROCm install path, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/rocm-6.1.2, - """, - ) for pth in cudart_paths: - print(f"* Found {BNB_BACKEND} runtime at: {pth}") + print(f"* Found ROCm runtime at: {pth}") + + +def print_runtime_diagnostics() -> None: + if HIP_ENVIRONMENT: + _print_hip_runtime_diagnostics() + else: + _print_cuda_runtime_diagnostics() diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 9165cbeed..8dc43ed2a 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -7,8 +7,8 @@ from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs from bitsandbytes.diagnostics.cuda import ( - print_cuda_diagnostics, - print_cuda_runtime_diagnostics, + print_diagnostics, + print_runtime_diagnostics, ) from bitsandbytes.diagnostics.utils import print_dedented, print_header @@ -63,8 +63,8 @@ def main(): print(f"2. {BNB_BACKEND} not installed") print(f"3. You have multiple conflicting {BNB_BACKEND} libraries") if cuda_specs: - print_cuda_diagnostics(cuda_specs) - print_cuda_runtime_diagnostics() + print_diagnostics(cuda_specs) + print_runtime_diagnostics() print_header("") print_header("DEBUG INFO END") print_header("") diff --git a/csrc/ops.hip b/csrc/ops.hip index a808d5ecb..4fdc3cbfa 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -618,7 +618,7 @@ template int igemmlt(hipblasLtHandl if (returnedAlgoCount == 0) { has_error = 1; - printf("Error: Matmul Algo Heurisitic didn't return algorithms\n"); + fprintf(stderr, "Error: Matmul Algo Heuristic didn't return algorithms\n"); } else {