From 8d84dab8f955d6ad5addec09d5a29a75de88c7bb Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 27 Nov 2025 11:32:51 +0800 Subject: [PATCH 1/3] fix cuda-python requirement --- fastdeploy/config.py | 10 ++++++++++ fastdeploy/eplb/async_expert_loader.py | 3 ++- requirements.txt | 1 - 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index c4a25a1504e..01b1e73e3bf 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1807,6 +1807,16 @@ def check(self): int(envs.FD_DISABLED_RECOVER) == 0 ), "FD_DISABLED_RECOVER is not supported while ENABLE_V1_KVCACHE_SCHEDULER is turned on." + if self.eplb_config is not None and self.eplb_config.enable_eplb: + try: + import cuda # noqa + except Exception: + raise ImportError( + "cuda-python not installed. Install the version matching your CUDA toolkit:\n" + " CUDA 12.x → pip install cuda-python==12.*\n" + " CUDA 11.x → pip install cuda-python==11.*" + ) + def print(self): """ print all config diff --git a/fastdeploy/eplb/async_expert_loader.py b/fastdeploy/eplb/async_expert_loader.py index 6e0355276b8..63616231e09 100644 --- a/fastdeploy/eplb/async_expert_loader.py +++ b/fastdeploy/eplb/async_expert_loader.py @@ -22,7 +22,6 @@ import numpy as np import paddle -from cuda import cudart from fastdeploy.config import EPLBConfig @@ -90,6 +89,8 @@ def create_mmap(model_name: List, ep_rank: int, ep_size: int, shm_uuid: str, epl shm_ptr = ctypes.cast(shm_ptr, ctypes.POINTER(ctypes.c_int8)) addr = ctypes.addressof(shm_ptr.contents) + from cuda import cudart + # Register memory with CUDA (ret,) = cudart.cudaHostRegister(addr, shm_size, 0) if ret != cudart.cudaError_t.cudaSuccess: diff --git a/requirements.txt b/requirements.txt index 2e016837dd4..e5f614d8a87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,4 +46,3 @@ msgspec einops setproctitle aistudio_sdk -cuda-python==12.8 From 5e337bd91bd51bb6c94ff5992842c50decd25211 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 27 Nov 2025 11:34:58 +0800 Subject: [PATCH 2/3] update --- fastdeploy/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 01b1e73e3bf..0208fb43866 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1814,7 +1814,6 @@ def check(self): raise ImportError( "cuda-python not installed. Install the version matching your CUDA toolkit:\n" " CUDA 12.x → pip install cuda-python==12.*\n" - " CUDA 11.x → pip install cuda-python==11.*" ) def print(self): From 82da6cd5da7d91745beb6e6213d419f6b2c798c1 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 27 Nov 2025 11:48:41 +0800 Subject: [PATCH 3/3] fix --- fastdeploy/config.py | 2 +- fastdeploy/eplb/async_expert_loader.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 0208fb43866..3f4d326e154 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1810,7 +1810,7 @@ def check(self): if self.eplb_config is not None and self.eplb_config.enable_eplb: try: import cuda # noqa - except Exception: + except ImportError: raise ImportError( "cuda-python not installed. Install the version matching your CUDA toolkit:\n" " CUDA 12.x → pip install cuda-python==12.*\n" diff --git a/fastdeploy/eplb/async_expert_loader.py b/fastdeploy/eplb/async_expert_loader.py index 63616231e09..9ab0bfae08a 100644 --- a/fastdeploy/eplb/async_expert_loader.py +++ b/fastdeploy/eplb/async_expert_loader.py @@ -23,6 +23,11 @@ import numpy as np import paddle +try: + from cuda import cudart +except ImportError: + cudart = None + from fastdeploy.config import EPLBConfig REARRANGE_EXPERT_MAGIC_NUM = 147183647 @@ -89,7 +94,11 @@ def create_mmap(model_name: List, ep_rank: int, ep_size: int, shm_uuid: str, epl shm_ptr = ctypes.cast(shm_ptr, ctypes.POINTER(ctypes.c_int8)) addr = ctypes.addressof(shm_ptr.contents) - from cuda import cudart + if cudart is None: + raise ImportError( + "cuda-python not installed. Install the version matching your CUDA toolkit:\n" + " CUDA 12.x → pip install cuda-python==12.*\n" + ) # Register memory with CUDA (ret,) = cudart.cudaHostRegister(addr, shm_size, 0)