From 5f506ab0e54a7c04a077896e355aafe77f2755d3 Mon Sep 17 00:00:00 2001 From: ver217 Date: Wed, 3 Nov 2021 13:27:29 +0800 Subject: [PATCH] fix: gradient clipping of FP16 optimizer --- colossalai/nn/optimizer/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colossalai/nn/optimizer/_utils.py b/colossalai/nn/optimizer/_utils.py index 1be8ffc1b2c7..6cd92bb38c34 100644 --- a/colossalai/nn/optimizer/_utils.py +++ b/colossalai/nn/optimizer/_utils.py @@ -106,7 +106,7 @@ def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): tensor_parallel_norm = _calc_lp(tensor_parallel_grads, norm_type) no_tensor_parallel_grads = _calc_lp( no_tensor_parallel_grads, norm_type) - if gpc.is_initialized(ParallelMode.TENSOR): + if gpc.is_initialized(ParallelMode.TENSOR) and len(tensor_parallel_grads) > 0: # Sum across all model-parallel GPUs. torch.distributed.all_reduce(tensor_parallel_norm, op=torch.distributed.ReduceOp.SUM,