From 9b8df10a361e09a49b3cb5e8099b7d8819bc187b Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Sat, 6 Dec 2025 09:03:49 +0800
Subject: [PATCH 1/3] [BugFix] Fix some parameter place on CPU in PaddleOCR-VL

---
 .../model_executor/models/paddleocr_vl/projector.py  |  6 +++++-
 .../model_executor/models/paddleocr_vl/siglip.py     | 12 ++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/fastdeploy/model_executor/models/paddleocr_vl/projector.py b/fastdeploy/model_executor/models/paddleocr_vl/projector.py
index f1b5ef60928..434e416fc52 100644
--- a/fastdeploy/model_executor/models/paddleocr_vl/projector.py
+++ b/fastdeploy/model_executor/models/paddleocr_vl/projector.py
@@ -20,6 +20,8 @@
 import paddle
 import paddle.nn as nn
 
+from fastdeploy.model_executor.utils import h2d_copy
+
 
 class GELUActivation(nn.Layer):
     """
@@ -97,6 +99,8 @@ def forward(self, image_features, image_grid_thw):
 
     def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -106,4 +110,4 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
index 0bb256cd51f..982cfb266fc 100644
--- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
+++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
@@ -23,6 +23,7 @@
 import paddle.nn.functional as F
 from paddleformers.transformers.model_utils import PretrainedModel
 
+from fastdeploy.model_executor.layers.utils import get_tensor
 from fastdeploy.model_executor.utils import h2d_copy, slice_fn
 
 from .config import PaddleOCRVisionConfig
@@ -69,6 +70,7 @@ def __init__(self, config):
             self.flash_attn_kwargs = {"scale": self.scale, "training": False}
 
     def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
+        # loaded_weight = get_tensor(loaded_weight)
         # Tensor parallelism splits the weight along the output_dim
         if loaded_weight.dim() == 2:
             loaded_weight = loaded_weight.transpose([1, 0])
@@ -100,6 +102,8 @@ def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str]
 
     def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -109,7 +113,8 @@ def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
+
 
     def forward(
         self,
@@ -287,6 +292,8 @@ def __init__(self, config):
 
     def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
         loaded_weight = loaded_weight.transpose([1, 0])
+        if not param._is_initialized():
+            param.initialize()
         assert param.shape == loaded_weight.shape, (
             f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
         )
@@ -296,7 +303,8 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 loaded_weight = loaded_weight.view(param.dtype)
             else:
                 loaded_weight = loaded_weight.cast(param.dtype)
-        param.copy_(loaded_weight, False)
+        h2d_copy(param, loaded_weight)
+
 
     def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
         hidden_states = self.fc1(hidden_states)

From d9a5a5f14141d529aee4cd3b5a38aab6fe382d63 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Sat, 6 Dec 2025 09:10:22 +0800
Subject: [PATCH 2/3] clean log

---
 fastdeploy/model_executor/models/paddleocr_vl/siglip.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
index 982cfb266fc..a7e21f2565c 100644
--- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
+++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
@@ -70,7 +70,6 @@ def __init__(self, config):
             self.flash_attn_kwargs = {"scale": self.scale, "training": False}
 
     def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
-        # loaded_weight = get_tensor(loaded_weight)
         # Tensor parallelism splits the weight along the output_dim
         if loaded_weight.dim() == 2:
             loaded_weight = loaded_weight.transpose([1, 0])

From 332913970975ddd0b3817f8c124540fb3b950769 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Sat, 6 Dec 2025 09:21:09 +0800
Subject: [PATCH 3/3] fix codestyle

---
 fastdeploy/model_executor/models/paddleocr_vl/siglip.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
index a7e21f2565c..452d8dd1f76 100644
--- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
+++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
@@ -23,7 +23,6 @@
 import paddle.nn.functional as F
 from paddleformers.transformers.model_utils import PretrainedModel
 
-from fastdeploy.model_executor.layers.utils import get_tensor
 from fastdeploy.model_executor.utils import h2d_copy, slice_fn
 
 from .config import PaddleOCRVisionConfig
@@ -114,7 +113,6 @@ def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional
                 loaded_weight = loaded_weight.cast(param.dtype)
         h2d_copy(param, loaded_weight)
 
-
     def forward(
         self,
         hidden_states: paddle.Tensor,  # [B, L, D]
@@ -304,7 +302,6 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 loaded_weight = loaded_weight.cast(param.dtype)
         h2d_copy(param, loaded_weight)
 
-
     def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
         hidden_states = self.fc1(hidden_states)
         hidden_states = self.activation_fn(hidden_states[0])