From 074e00fac3999b9fa11436c105d4bc7046c7b334 Mon Sep 17 00:00:00 2001 From: yaozhewei Date: Wed, 19 Jan 2022 04:30:03 +0500 Subject: [PATCH 1/3] fix cifar10 moe example with new moe api --- cifar/cifar10_deepspeed.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/cifar/cifar10_deepspeed.py b/cifar/cifar10_deepspeed.py index d51d27b80..e7a5710cd 100755 --- a/cifar/cifar10_deepspeed.py +++ b/cifar/cifar10_deepspeed.py @@ -211,26 +211,11 @@ def forward(self, x): def create_moe_param_groups(model): - from deepspeed.moe.utils import is_moe_param - - params_with_weight_decay = {'params': [], 'name': 'weight_decay_params'} - moe_params_with_weight_decay = { - 'params': [], - 'moe': True, - 'name': 'weight_decay_moe_params' - } - - for module_ in model.modules(): - moe_params_with_weight_decay['params'].extend([ - p for n, p in list(module_._parameters.items()) - if p is not None and is_moe_param(p) - ]) - params_with_weight_decay['params'].extend([ - p for n, p in list(module_._parameters.items()) - if p is not None and not is_moe_param(p) - ]) - - return params_with_weight_decay, moe_params_with_weight_decay + from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer + + params_with_weight_decay = {'params': model.parameters(), 'name': 'weight_decay_params'} + + return split_params_into_different_moe_groups_for_optimizer(params_with_weight_decay) parameters = filter(lambda p: p.requires_grad, net.parameters()) From ad3577e2df386ff8fa0c093ee933f80c293aa409 Mon Sep 17 00:00:00 2001 From: yaozhewei Date: Wed, 19 Jan 2022 04:34:39 +0500 Subject: [PATCH 2/3] shorten the naming --- cifar/cifar10_deepspeed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cifar/cifar10_deepspeed.py b/cifar/cifar10_deepspeed.py index e7a5710cd..95373ce1d 100755 --- a/cifar/cifar10_deepspeed.py +++ b/cifar/cifar10_deepspeed.py @@ -213,7 +213,7 @@ def forward(self, x): def create_moe_param_groups(model): from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer - params_with_weight_decay = {'params': model.parameters(), 'name': 'weight_decay_params'} + params_with_weight_decay = {'params': model.parameters(), 'name': 'parameters'} return split_params_into_different_moe_groups_for_optimizer(params_with_weight_decay) From 644905f53416dd94a00c8dc6386454b8835898b7 Mon Sep 17 00:00:00 2001 From: yaozhewei Date: Wed, 19 Jan 2022 05:28:30 +0500 Subject: [PATCH 3/3] simplify the name --- cifar/cifar10_deepspeed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cifar/cifar10_deepspeed.py b/cifar/cifar10_deepspeed.py index 95373ce1d..ed509b8e1 100755 --- a/cifar/cifar10_deepspeed.py +++ b/cifar/cifar10_deepspeed.py @@ -213,9 +213,9 @@ def forward(self, x): def create_moe_param_groups(model): from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer - params_with_weight_decay = {'params': model.parameters(), 'name': 'parameters'} + parameters = {'params': model.parameters(), 'name': 'parameters'} - return split_params_into_different_moe_groups_for_optimizer(params_with_weight_decay) + return split_params_into_different_moe_groups_for_optimizer(parameters) parameters = filter(lambda p: p.requires_grad, net.parameters())