diff --git a/deepspeed/runtime/zero/stage_1_and_2.py b/deepspeed/runtime/zero/stage_1_and_2.py index 6cfcc418e71a..68385af04ba2 100755 --- a/deepspeed/runtime/zero/stage_1_and_2.py +++ b/deepspeed/runtime/zero/stage_1_and_2.py @@ -1308,7 +1308,7 @@ def complete_grad_norm_calculation_for_cpu_offload(self, params): total_norm = total_norm_cuda[0].item()**(1. / norm_type) if total_norm == float('inf') or total_norm == -float('inf') or total_norm != total_norm: - total_norm = -1 + total_norm = -1. return total_norm