From 93b45a34ece27f91241e0eabe139d65650ca5804 Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:42:46 +0800 Subject: [PATCH 1/9] Update initializer_1d.py --- colossalai/context/process_group_initializer/initializer_1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_1d.py b/colossalai/context/process_group_initializer/initializer_1d.py index 4c05028041ce..ddfb6c292578 100644 --- a/colossalai/context/process_group_initializer/initializer_1d.py +++ b/colossalai/context/process_group_initializer/initializer_1d.py @@ -44,7 +44,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.tensor_parallel_size + j for j in range(self.tensor_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From ad20ab3c62188f3808f4e3f9d5ccd913972660fb Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:43:56 +0800 Subject: [PATCH 2/9] Update initializer_2d.py --- .../context/process_group_initializer/initializer_2d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/colossalai/context/process_group_initializer/initializer_2d.py b/colossalai/context/process_group_initializer/initializer_2d.py index fe0ba553d6f3..6c5f218c1265 100644 --- a/colossalai/context/process_group_initializer/initializer_2d.py +++ b/colossalai/context/process_group_initializer/initializer_2d.py @@ -56,7 +56,7 @@ def init_dist_group(self): for j in range(self.summa_dim): ranks = [i * self.tensor_parallel_size + j * self.summa_dim + k for k in range(self.summa_dim)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -105,7 +105,7 @@ def init_dist_group(self): for j in range(self.summa_dim): ranks = [i * self.tensor_parallel_size + j + k * self.summa_dim for k in range(self.summa_dim)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From e8a5f0fe0d32dde5af9d57872cd11d935ecc315b Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:44:46 +0800 Subject: [PATCH 3/9] Update initializer_2p5d.py --- .../context/process_group_initializer/initializer_2p5d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/colossalai/context/process_group_initializer/initializer_2p5d.py b/colossalai/context/process_group_initializer/initializer_2p5d.py index 6b6fdc5d715c..19dafaed798f 100644 --- a/colossalai/context/process_group_initializer/initializer_2p5d.py +++ b/colossalai/context/process_group_initializer/initializer_2p5d.py @@ -74,7 +74,7 @@ def init_dist_group(self): for i in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -128,7 +128,7 @@ def init_dist_group(self): for j in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -182,7 +182,7 @@ def init_dist_group(self): for k in range(self.tesseract_dep) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -237,7 +237,7 @@ def init_dist_group(self): for j in range(self.tesseract_dim) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From 554275688485b04ad26146a03602004a414da232 Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:45:25 +0800 Subject: [PATCH 4/9] Update initializer_3d.py --- .../process_group_initializer/initializer_3d.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/colossalai/context/process_group_initializer/initializer_3d.py b/colossalai/context/process_group_initializer/initializer_3d.py index b752b8f45654..7d65a1dc7916 100644 --- a/colossalai/context/process_group_initializer/initializer_3d.py +++ b/colossalai/context/process_group_initializer/initializer_3d.py @@ -62,7 +62,7 @@ def init_dist_group(self): for k in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for j in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -113,7 +113,7 @@ def init_dist_group(self): for j in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for i in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -164,7 +164,7 @@ def init_dist_group(self): for j in range(self.depth): ranks = [h * self.depth**3 + i + self.depth * (j + self.depth * k) for k in range(self.depth)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -217,7 +217,7 @@ def init_dist_group(self): for i in range(self.depth) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) @@ -270,7 +270,7 @@ def init_dist_group(self): for i in range(self.depth) ] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From 2704cccf73810931ce5d3caac6e1ca6cdb32d85c Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:45:40 +0800 Subject: [PATCH 5/9] Update initializer_data.py --- .../context/process_group_initializer/initializer_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_data.py b/colossalai/context/process_group_initializer/initializer_data.py index 0b8b0d91fcb9..1bae59b94f9a 100644 --- a/colossalai/context/process_group_initializer/initializer_data.py +++ b/colossalai/context/process_group_initializer/initializer_data.py @@ -42,7 +42,7 @@ def init_dist_group(self): for i in range(self.num_data_parallel_group): ranks = [i + j * self.num_data_parallel_group for j in range(self.data_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From ce0edc392fc2e689f0f6fd930d27314ece784953 Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:46:00 +0800 Subject: [PATCH 6/9] Update initializer_model.py --- .../context/process_group_initializer/initializer_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_model.py b/colossalai/context/process_group_initializer/initializer_model.py index 99b9cc0d4edc..6d889bbd39ad 100644 --- a/colossalai/context/process_group_initializer/initializer_model.py +++ b/colossalai/context/process_group_initializer/initializer_model.py @@ -43,7 +43,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.model_parallel_size + j for j in range(self.model_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From 46292ddd085d1331253ee91787e114ea4184ba02 Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:46:17 +0800 Subject: [PATCH 7/9] Update initializer_pipeline.py --- .../context/process_group_initializer/initializer_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_pipeline.py b/colossalai/context/process_group_initializer/initializer_pipeline.py index edd1a3706c68..8b418fedcd7b 100644 --- a/colossalai/context/process_group_initializer/initializer_pipeline.py +++ b/colossalai/context/process_group_initializer/initializer_pipeline.py @@ -40,7 +40,7 @@ def init_dist_group(self): range(i * self.data_group_size + j, (i + 1) * self.data_group_size, self.pipeline_stage_size)) pipe_group_size = len(pipe_ranks) pipe_group = dist.new_group(pipe_ranks) - group_cpu = dist.new_group(pipe_ranks, backend='gloo') if dist.get_backend() != 'gloo' else pipe_group + group_cpu = dist.new_group(pipe_ranks, backend='gloo') if dist.get_backend() == 'gloo' else pipe_group if self.rank in pipe_ranks: local_rank = pipe_ranks.index(self.rank) From ce76f986fc9288e5656f65b97fa26f702b692f44 Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:46:53 +0800 Subject: [PATCH 8/9] Update initializer_sequence.py --- .../context/process_group_initializer/initializer_sequence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_sequence.py b/colossalai/context/process_group_initializer/initializer_sequence.py index 682fe4bb7633..f45f5823efdc 100644 --- a/colossalai/context/process_group_initializer/initializer_sequence.py +++ b/colossalai/context/process_group_initializer/initializer_sequence.py @@ -45,7 +45,7 @@ def init_dist_group(self): for i in range(self.num_group): ranks = [i * self.dp_size + j for j in range(self.dp_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank) From 9fa9f5dcf489da04477f8d3d11f85b4b3d47aa2d Mon Sep 17 00:00:00 2001 From: Haofan Wang Date: Tue, 10 Jan 2023 13:47:10 +0800 Subject: [PATCH 9/9] Update initializer_tensor.py --- .../context/process_group_initializer/initializer_tensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/context/process_group_initializer/initializer_tensor.py b/colossalai/context/process_group_initializer/initializer_tensor.py index d2b5be9cfffb..8cc7907f9816 100644 --- a/colossalai/context/process_group_initializer/initializer_tensor.py +++ b/colossalai/context/process_group_initializer/initializer_tensor.py @@ -42,7 +42,7 @@ def init_dist_group(self): for i in range(self.num_tensor_parallel_group): ranks = [i * self.tensor_parallel_size + j for j in range(self.tensor_parallel_size)] group = dist.new_group(ranks) - group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() != 'gloo' else group + group_cpu = dist.new_group(ranks, backend='gloo') if dist.get_backend() == 'gloo' else group if self.rank in ranks: local_rank = ranks.index(self.rank)