From 70f84f22cb36e3ed51ffcd2851e89810c18d3553 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Tue, 2 Dec 2025 22:16:03 +0800 Subject: [PATCH 1/2] fix skip_quant --- fastdeploy/model_executor/layers/linear.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 7b1dc794aa4..37657f33b06 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -508,7 +508,7 @@ class QKVParallelLinear(ColumnParallelLinear): QKVParallelLinear Layer. """ - def __init__(self, fd_config, prefix, with_bias=False, add_bias=True): + def __init__(self, fd_config, prefix, with_bias=False, add_bias=True, skip_quant=False): """ Initialize the QKV Linear layer with given parameters. @@ -542,6 +542,7 @@ def __init__(self, fd_config, prefix, with_bias=False, add_bias=True): output_size=output_size, with_bias=with_bias, add_bias=add_bias, + skip_quant=skip_quant, ) def _get_shard_size_mapping(self, loaded_shard_id: str): From fc7e9e1f47d6eddf5d7556a49c940b6ba7bf54c9 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Tue, 2 Dec 2025 22:29:15 +0800 Subject: [PATCH 2/2] fix --- fastdeploy/model_executor/layers/linear.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 37657f33b06..00fec6ba7f1 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -721,7 +721,6 @@ def __init__( skip_quant (bool): Whether to skip quantization. Defaults to False. """ self.fd_config = fd_config - self.skip_quant = False self.nranks = fd_config.parallel_config.tensor_parallel_size self.tp_group = fd_config.parallel_config.tp_group self.hidden_size = fd_config.model_config.hidden_size