From 9b8df10a361e09a49b3cb5e8099b7d8819bc187b Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sat, 6 Dec 2025 09:03:49 +0800 Subject: [PATCH 1/3] [BugFix] Fix some parameter place on CPU in PaddleOCR-VL --- .../model_executor/models/paddleocr_vl/projector.py | 6 +++++- .../model_executor/models/paddleocr_vl/siglip.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/models/paddleocr_vl/projector.py b/fastdeploy/model_executor/models/paddleocr_vl/projector.py index f1b5ef60928..434e416fc52 100644 --- a/fastdeploy/model_executor/models/paddleocr_vl/projector.py +++ b/fastdeploy/model_executor/models/paddleocr_vl/projector.py @@ -20,6 +20,8 @@ import paddle import paddle.nn as nn +from fastdeploy.model_executor.utils import h2d_copy + class GELUActivation(nn.Layer): """ @@ -97,6 +99,8 @@ def forward(self, image_features, image_grid_thw): def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None): loaded_weight = loaded_weight.transpose([1, 0]) + if not param._is_initialized(): + param.initialize() assert param.shape == loaded_weight.shape, ( f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})" ) @@ -106,4 +110,4 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N loaded_weight = loaded_weight.view(param.dtype) else: loaded_weight = loaded_weight.cast(param.dtype) - param.copy_(loaded_weight, False) + h2d_copy(param, loaded_weight) diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py index 0bb256cd51f..982cfb266fc 100644 --- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py +++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py @@ -23,6 +23,7 @@ import paddle.nn.functional as F from paddleformers.transformers.model_utils import PretrainedModel +from fastdeploy.model_executor.layers.utils import get_tensor from fastdeploy.model_executor.utils import h2d_copy, slice_fn from .config import PaddleOCRVisionConfig @@ -69,6 +70,7 @@ def __init__(self, config): self.flash_attn_kwargs = {"scale": self.scale, "training": False} def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None): + # loaded_weight = get_tensor(loaded_weight) # Tensor parallelism splits the weight along the output_dim if loaded_weight.dim() == 2: loaded_weight = loaded_weight.transpose([1, 0]) @@ -100,6 +102,8 @@ def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None): loaded_weight = loaded_weight.transpose([1, 0]) + if not param._is_initialized(): + param.initialize() assert param.shape == loaded_weight.shape, ( f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})" ) @@ -109,7 +113,8 @@ def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional loaded_weight = loaded_weight.view(param.dtype) else: loaded_weight = loaded_weight.cast(param.dtype) - param.copy_(loaded_weight, False) + h2d_copy(param, loaded_weight) + def forward( self, @@ -287,6 +292,8 @@ def __init__(self, config): def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None): loaded_weight = loaded_weight.transpose([1, 0]) + if not param._is_initialized(): + param.initialize() assert param.shape == loaded_weight.shape, ( f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})" ) @@ -296,7 +303,8 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N loaded_weight = loaded_weight.view(param.dtype) else: loaded_weight = loaded_weight.cast(param.dtype) - param.copy_(loaded_weight, False) + h2d_copy(param, loaded_weight) + def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor: hidden_states = self.fc1(hidden_states) From d9a5a5f14141d529aee4cd3b5a38aab6fe382d63 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sat, 6 Dec 2025 09:10:22 +0800 Subject: [PATCH 2/3] clean log --- fastdeploy/model_executor/models/paddleocr_vl/siglip.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py index 982cfb266fc..a7e21f2565c 100644 --- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py +++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py @@ -70,7 +70,6 @@ def __init__(self, config): self.flash_attn_kwargs = {"scale": self.scale, "training": False} def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None): - # loaded_weight = get_tensor(loaded_weight) # Tensor parallelism splits the weight along the output_dim if loaded_weight.dim() == 2: loaded_weight = loaded_weight.transpose([1, 0]) From 332913970975ddd0b3817f8c124540fb3b950769 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Sat, 6 Dec 2025 09:21:09 +0800 Subject: [PATCH 3/3] fix codestyle --- fastdeploy/model_executor/models/paddleocr_vl/siglip.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py index a7e21f2565c..452d8dd1f76 100644 --- a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py +++ b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py @@ -23,7 +23,6 @@ import paddle.nn.functional as F from paddleformers.transformers.model_utils import PretrainedModel -from fastdeploy.model_executor.layers.utils import get_tensor from fastdeploy.model_executor.utils import h2d_copy, slice_fn from .config import PaddleOCRVisionConfig @@ -114,7 +113,6 @@ def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional loaded_weight = loaded_weight.cast(param.dtype) h2d_copy(param, loaded_weight) - def forward( self, hidden_states: paddle.Tensor, # [B, L, D] @@ -304,7 +302,6 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N loaded_weight = loaded_weight.cast(param.dtype) h2d_copy(param, loaded_weight) - def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor: hidden_states = self.fc1(hidden_states) hidden_states = self.activation_fn(hidden_states[0])