diff --git a/deepspeed/runtime/zero/stage3.py b/deepspeed/runtime/zero/stage3.py index 8b7aee16c4ee..21a850050652 100755 --- a/deepspeed/runtime/zero/stage3.py +++ b/deepspeed/runtime/zero/stage3.py @@ -2405,7 +2405,7 @@ def _model_parallel_all_reduce(self, tensor, op): """ Perform all reduce within model parallel group, if any. """ if self.model_parallel_group is None: - torch.distributed.all_reduce(tensor=tensor, op=op) + pass else: torch.distributed.all_reduce(tensor=tensor, op=op,