diff --git a/fastdeploy/config.py b/fastdeploy/config.py index c4a25a1504e..3f4d326e154 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1807,6 +1807,15 @@ def check(self): int(envs.FD_DISABLED_RECOVER) == 0 ), "FD_DISABLED_RECOVER is not supported while ENABLE_V1_KVCACHE_SCHEDULER is turned on." + if self.eplb_config is not None and self.eplb_config.enable_eplb: + try: + import cuda # noqa + except ImportError: + raise ImportError( + "cuda-python not installed. Install the version matching your CUDA toolkit:\n" + " CUDA 12.x → pip install cuda-python==12.*\n" + ) + def print(self): """ print all config diff --git a/fastdeploy/eplb/async_expert_loader.py b/fastdeploy/eplb/async_expert_loader.py index 6e0355276b8..9ab0bfae08a 100644 --- a/fastdeploy/eplb/async_expert_loader.py +++ b/fastdeploy/eplb/async_expert_loader.py @@ -22,7 +22,11 @@ import numpy as np import paddle -from cuda import cudart + +try: + from cuda import cudart +except ImportError: + cudart = None from fastdeploy.config import EPLBConfig @@ -90,6 +94,12 @@ def create_mmap(model_name: List, ep_rank: int, ep_size: int, shm_uuid: str, epl shm_ptr = ctypes.cast(shm_ptr, ctypes.POINTER(ctypes.c_int8)) addr = ctypes.addressof(shm_ptr.contents) + if cudart is None: + raise ImportError( + "cuda-python not installed. Install the version matching your CUDA toolkit:\n" + " CUDA 12.x → pip install cuda-python==12.*\n" + ) + # Register memory with CUDA (ret,) = cudart.cudaHostRegister(addr, shm_size, 0) if ret != cudart.cudaError_t.cudaSuccess: diff --git a/requirements.txt b/requirements.txt index 2e016837dd4..e5f614d8a87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,4 +46,3 @@ msgspec einops setproctitle aistudio_sdk -cuda-python==12.8