From 8d84dab8f955d6ad5addec09d5a29a75de88c7bb Mon Sep 17 00:00:00 2001
From: yuanlehome <yuanlehome@163.com>
Date: Thu, 27 Nov 2025 11:32:51 +0800
Subject: [PATCH 1/3] fix cuda-python requirement

---
 fastdeploy/config.py                   | 10 ++++++++++
 fastdeploy/eplb/async_expert_loader.py |  3 ++-
 requirements.txt                       |  1 -
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index c4a25a1504e..01b1e73e3bf 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1807,6 +1807,16 @@ def check(self):
                 int(envs.FD_DISABLED_RECOVER) == 0
             ), "FD_DISABLED_RECOVER is not supported while ENABLE_V1_KVCACHE_SCHEDULER is turned on."
 
+        if self.eplb_config is not None and self.eplb_config.enable_eplb:
+            try:
+                import cuda  # noqa
+            except Exception:
+                raise ImportError(
+                    "cuda-python not installed. Install the version matching your CUDA toolkit:\n"
+                    "  CUDA 12.x → pip install cuda-python==12.*\n"
+                    "  CUDA 11.x → pip install cuda-python==11.*"
+                )
+
     def print(self):
         """
         print all config
diff --git a/fastdeploy/eplb/async_expert_loader.py b/fastdeploy/eplb/async_expert_loader.py
index 6e0355276b8..63616231e09 100644
--- a/fastdeploy/eplb/async_expert_loader.py
+++ b/fastdeploy/eplb/async_expert_loader.py
@@ -22,7 +22,6 @@
 
 import numpy as np
 import paddle
-from cuda import cudart
 
 from fastdeploy.config import EPLBConfig
 
@@ -90,6 +89,8 @@ def create_mmap(model_name: List, ep_rank: int, ep_size: int, shm_uuid: str, epl
         shm_ptr = ctypes.cast(shm_ptr, ctypes.POINTER(ctypes.c_int8))
         addr = ctypes.addressof(shm_ptr.contents)
 
+        from cuda import cudart
+
         # Register memory with CUDA
         (ret,) = cudart.cudaHostRegister(addr, shm_size, 0)
         if ret != cudart.cudaError_t.cudaSuccess:
diff --git a/requirements.txt b/requirements.txt
index 2e016837dd4..e5f614d8a87 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -46,4 +46,3 @@ msgspec
 einops
 setproctitle
 aistudio_sdk
-cuda-python==12.8

From 5e337bd91bd51bb6c94ff5992842c50decd25211 Mon Sep 17 00:00:00 2001
From: yuanlehome <yuanlehome@163.com>
Date: Thu, 27 Nov 2025 11:34:58 +0800
Subject: [PATCH 2/3] update

---
 fastdeploy/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 01b1e73e3bf..0208fb43866 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1814,7 +1814,6 @@ def check(self):
                 raise ImportError(
                     "cuda-python not installed. Install the version matching your CUDA toolkit:\n"
                     "  CUDA 12.x → pip install cuda-python==12.*\n"
-                    "  CUDA 11.x → pip install cuda-python==11.*"
                 )
 
     def print(self):

From 82da6cd5da7d91745beb6e6213d419f6b2c798c1 Mon Sep 17 00:00:00 2001
From: yuanlehome <yuanlehome@163.com>
Date: Thu, 27 Nov 2025 11:48:41 +0800
Subject: [PATCH 3/3] fix

---
 fastdeploy/config.py                   |  2 +-
 fastdeploy/eplb/async_expert_loader.py | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 0208fb43866..3f4d326e154 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1810,7 +1810,7 @@ def check(self):
         if self.eplb_config is not None and self.eplb_config.enable_eplb:
             try:
                 import cuda  # noqa
-            except Exception:
+            except ImportError:
                 raise ImportError(
                     "cuda-python not installed. Install the version matching your CUDA toolkit:\n"
                     "  CUDA 12.x → pip install cuda-python==12.*\n"
diff --git a/fastdeploy/eplb/async_expert_loader.py b/fastdeploy/eplb/async_expert_loader.py
index 63616231e09..9ab0bfae08a 100644
--- a/fastdeploy/eplb/async_expert_loader.py
+++ b/fastdeploy/eplb/async_expert_loader.py
@@ -23,6 +23,11 @@
 import numpy as np
 import paddle
 
+try:
+    from cuda import cudart
+except ImportError:
+    cudart = None
+
 from fastdeploy.config import EPLBConfig
 
 REARRANGE_EXPERT_MAGIC_NUM = 147183647
@@ -89,7 +94,11 @@ def create_mmap(model_name: List, ep_rank: int, ep_size: int, shm_uuid: str, epl
         shm_ptr = ctypes.cast(shm_ptr, ctypes.POINTER(ctypes.c_int8))
         addr = ctypes.addressof(shm_ptr.contents)
 
-        from cuda import cudart
+        if cudart is None:
+            raise ImportError(
+                "cuda-python not installed. Install the version matching your CUDA toolkit:\n"
+                "  CUDA 12.x → pip install cuda-python==12.*\n"
+            )
 
         # Register memory with CUDA
         (ret,) = cudart.cudaHostRegister(addr, shm_size, 0)