From ca3893fb671b8db22bd44dee6f25a3adedea591f Mon Sep 17 00:00:00 2001 From: "Lai, Yejing" Date: Sun, 23 Feb 2025 22:55:37 -0800 Subject: [PATCH] Add deepseekV3 AutoTP Signed-off-by: Lai, Yejing --- deepspeed/module_inject/auto_tp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py index d148c26968b3..b089ec420d47 100755 --- a/deepspeed/module_inject/auto_tp.py +++ b/deepspeed/module_inject/auto_tp.py @@ -137,7 +137,8 @@ def is_load_module(module): "LPLayerNorm", "SharedEmbedding", "OPTLearnedPositionalEmbedding", "LlamaRMSNorm", "FalconLinear", "MistralRMSNorm", "T5LayerNorm", "MixtralRMSNorm", "Phi3RotaryEmbedding", "Phi3SuScaledRotaryEmbedding", "Phi3RMSNorm", "YuanRMSNorm", "YuanRotaryEmbedding", "Phi3LongRoPEScaledRotaryEmbedding", "Qwen2RMSNorm", - "DeepseekV2RMSNorm", "DeepseekV2YarnRotaryEmbedding", "MoEGate" + "DeepseekV2RMSNorm", "DeepseekV3RMSNorm", "DeepseekV2YarnRotaryEmbedding", "DeepseekV3YarnRotaryEmbedding", + "MoEGate" ] return module.__class__ in load_layers or module._get_name() in load_layer_names