diff --git a/colossalai/context/process_group_initializer/initializer_1d.py b/colossalai/context/process_group_initializer/initializer_1d.py index 4c05028041ce..ddfb6c292578 100644 --- a/colossalai/context/process_group_initializer/initializer_1d.py +++ b/colossalai/context/process_group_initializer/initializer_1d.py @@ -44,7 +44,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.tensor_parallel_size + j for j in range(self.tensor_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_2d.py b/colossalai/context/process_group_initializer/initializer_2d.py index fe0ba553d6f3..6c5f218c1265 100644 --- a/colossalai/context/process_group_initializer/initializer_2d.py +++ b/colossalai/context/process_group_initializer/initializer_2d.py @@ -56,7 +56,7 @@ def init_dist_group(self): for j in range(self.summa_dim): ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k for k in range(self.summa_dim)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -105,7 +105,7 @@ def init_dist_group(self): for j in range(self.summa_dim): ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim for k in range(self.summa_dim)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_2p5d.py b/colossalai/context/process_group_initializer/initializer_2p5d.py index 6b6fdc5d715c..19dafaed798f 100644 --- a/colossalai/context/process_group_initializer/initializer_2p5d.py +++ b/colossalai/context/process_group_initializer/initializer_2p5d.py @@ -74,7 +74,7 @@ def init_dist_group(self): for i in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -128,7 +128,7 @@ def init_dist_group(self): for j in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -182,7 +182,7 @@ def init_dist_group(self): for k in range(self.tesseract_dep) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -237,7 +237,7 @@ def init_dist_group(self): for j in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_3d.py b/colossalai/context/process_group_initializer/initializer_3d.py index b752b8f45654..7d65a1dc7916 100644 --- a/colossalai/context/process_group_initializer/initializer_3d.py +++ b/colossalai/context/process_group_initializer/initializer_3d.py @@ -62,7 +62,7 @@ def init_dist_group(self): for k in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for j in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -113,7 +113,7 @@ def init_dist_group(self): for j in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for i in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -164,7 +164,7 @@ def init_dist_group(self): for j in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for k in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -217,7 +217,7 @@ def init_dist_group(self): for i in range(self.depth) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -270,7 +270,7 @@ def init_dist_group(self): for i in range(self.depth) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_data.py b/colossalai/context/process_group_initializer/initializer_data.py index 0b8b0d91fcb9..1bae59b94f9a 100644 --- a/colossalai/context/process_group_initializer/initializer_data.py +++ b/colossalai/context/process_group_initializer/initializer_data.py @@ -42,7 +42,7 @@ def init_dist_group(self): for i in range(self.num_data_parallel_group): ranks = [i + j * self.num_data_parallel_group for j in range(self.data_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_model.py b/colossalai/context/process_group_initializer/initializer_model.py index 99b9cc0d4edc..6d889bbd39ad 100644 --- a/colossalai/context/process_group_initializer/initializer_model.py +++ b/colossalai/context/process_group_initializer/initializer_model.py @@ -43,7 +43,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.model_parallel_size + j for j in range(self.model_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_pipeline.py b/colossalai/context/process_group_initializer/initializer_pipeline.py index edd1a3706c68..8b418fedcd7b 100644 --- a/colossalai/context/process_group_initializer/initializer_pipeline.py +++ b/colossalai/context/process_group_initializer/initializer_pipeline.py @@ -40,7 +40,7 @@ def init_dist_group(self): range(i * self.data_group_size + j, (i + 1) * self.data_group_size, self.pipeline_stage_size)) pipe_group_size = len(pipe_ranks) pipe_group = dist.new_group(pipe_ranks) - group_cpu = dist.new_group(pipe_ranks, backend='gloo') if dist.get_backend() != 'gloo' else pipe_group + group_cpu = dist.new_group(pipe_ranks, backend='gloo') if dist.get_backend() == 'gloo' else pipe_group if self.rank in pipe_ranks: local_rank = pipe_ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_sequence.py b/colossalai/context/process_group_initializer/initializer_sequence.py index 682fe4bb7633..f45f5823efdc 100644 --- a/colossalai/context/process_group_initializer/initializer_sequence.py +++ b/colossalai/context/process_group_initializer/initializer_sequence.py @@ -45,7 +45,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.dp_size + j for j in range(self.dp_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) diff --git a/colossalai/context/process_group_initializer/initializer_tensor.py b/colossalai/context/process_group_initializer/initializer_tensor.py index d2b5be9cfffb..8cc7907f9816 100644 --- a/colossalai/context/process_group_initializer/initializer_tensor.py +++ b/colossalai/context/process_group_initializer/initializer_tensor.py @@ -42,7 +42,7 @@ def init_dist_group(self): for i in range(self.num_tensor_parallel_group): ranks = [i * self.tensor_parallel_size + j for j in range(self.tensor_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank)