diff --git a/deepspeed/pt/deepspeed_light.py b/deepspeed/pt/deepspeed_light.py index 740727c837e5..fee9ec9cefde 100755 --- a/deepspeed/pt/deepspeed_light.py +++ b/deepspeed/pt/deepspeed_light.py @@ -629,9 +629,8 @@ def backward(self, loss, allreduce_gradients=True): self.gradient_accumulation_steps()) self.summary_events = [ (f'Train/Samples/train_loss', - loss.mean().item() * self.gradient_accumulation_steps(), - self.sample_count) - ] + loss.mean().item(), + self.sample_count)] for event in self.summary_events: # write_summary_events self.summary_writer.add_scalar(event[0], event[1], event[2]) self.summary_writer.flush()