From 79510f41066b307d65946c7c11143d65bb1f762e Mon Sep 17 00:00:00 2001 From: haze188 Date: Mon, 1 Jul 2024 06:10:10 +0000 Subject: [PATCH] [misc] fix typos --- colossalai/shardformer/policies/deepseek.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/colossalai/shardformer/policies/deepseek.py b/colossalai/shardformer/policies/deepseek.py index f8f39e66c121..1d64c643ebdb 100644 --- a/colossalai/shardformer/policies/deepseek.py +++ b/colossalai/shardformer/policies/deepseek.py @@ -39,11 +39,11 @@ def module_policy(self) -> Dict[Union[str, nn.Module], ModulePolicyDescription]: if self.shard_config.enable_sequence_parallelism: self.shard_config.enable_sequence_parallelism = False raise NotImplementedError( - "Mixtral dosen't support sequence parallelism now, will ignore the sequence parallelism flag." + "Deepseek dosen't support sequence parallelism now, will ignore the sequence parallelism flag." ) if self.shard_config.enable_tensor_parallelism: - raise NotImplementedError("Tensor parallelism is not supported for Mixtral model now.") + raise NotImplementedError("Tensor parallelism is not supported for Deepseek model now.") if getattr(self.shard_config, "ep_group", None) is None: raise ValueError("You must pass in ep_group via shard_config for expert parallel!") @@ -117,7 +117,7 @@ def get_held_layers(self) -> List[Module]: """Get pipeline layers for current stage.""" assert self.pipeline_stage_manager is not None - if self.model.__class__.__name__ == "MixtralModel": + if self.model.__class__.__name__ == "DeepseekModel": module = self.model else: module = self.model.model @@ -145,7 +145,7 @@ def module_policy(self): # set None as default self.set_pipeline_forward( model_cls=DeepseekModel, - new_forward=DeepseekPipelineForwards.mixtral_model_forward, + new_forward=DeepseekPipelineForwards.deepseek_model_forward, policy=policy, ) return policy