diff --git a/colossalai/shardformer/layer/qkv_fused_linear.py b/colossalai/shardformer/layer/qkv_fused_linear.py index 0f6595a7c4d6..000934ad91a2 100644 --- a/colossalai/shardformer/layer/qkv_fused_linear.py +++ b/colossalai/shardformer/layer/qkv_fused_linear.py @@ -695,6 +695,7 @@ def from_native_module( process_group=process_group, weight=module.weight, bias_=module.bias, + n_fused=n_fused, *args, **kwargs, )