From 6c6c5e53323ba2c1911bb6074e00c7d7886b5e36 Mon Sep 17 00:00:00 2001 From: qunyang Date: Thu, 21 Mar 2024 09:46:35 +0200 Subject: [PATCH] BF16 optimizer: Clear lp grads after updating hp grads in hook --- deepspeed/runtime/bf16_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/bf16_optimizer.py b/deepspeed/runtime/bf16_optimizer.py index 4ec603af1505..11ef224e7b7e 100644 --- a/deepspeed/runtime/bf16_optimizer.py +++ b/deepspeed/runtime/bf16_optimizer.py @@ -532,7 +532,7 @@ def _load_hp_checkpoint_state(self, checkpoint_dir): def accumulate_hp_grads_and_remove_lp(self, lp_param, group_idx, param_idx): assert self.immediate_grad_update - self._update_hp_grad(lp_param, group_idx, param_idx, clear_lp_grads=False) + self._update_hp_grad(lp_param, group_idx, param_idx, clear_lp_grads=True) def create_grad_acc_hooks(self): self.grad_accs = []