diff --git a/deepspeed/comm/torch.py b/deepspeed/comm/torch.py index efa0640fb87b..1146832d7655 100755 --- a/deepspeed/comm/torch.py +++ b/deepspeed/comm/torch.py @@ -145,11 +145,22 @@ def has_reduce_scatter_tensor(self): def init_process_group(self, backend, timeout, init_method, rank, world_size): if not torch.distributed.is_initialized(): - torch.distributed.init_process_group(backend, - timeout=timeout, - init_method=init_method, - rank=rank, - world_size=world_size) + if not required_torch_version(min_version=2.4): + # Windows torch builds do not come with lib_uv by default. + # More information here: https://pytorch.org/tutorials/intermediate/TCPStore_libuv_backend.html + use_libuv = False if os.name == "nt" else True + torch.distributed.init_process_group(backend, + timeout=timeout, + init_method=init_method, + rank=rank, + world_size=world_size, + use_libuv=use_libuv) + else: + torch.distributed.init_process_group(backend, + timeout=timeout, + init_method=init_method, + rank=rank, + world_size=world_size) self.using_mpi = torch.distributed.get_backend() == 'mpi' @disable_compiler_collective