From 0faf3eca2763c085cd283a971e0d98d4ca1fd3e2 Mon Sep 17 00:00:00 2001 From: Nate Wildermuth Date: Sun, 22 Mar 2020 17:52:14 -0500 Subject: [PATCH 1/2] Fix incorrect tensorboard loss calculation The loss is scaled by the gradient_accumulation_steps value unnecessarily. --- deepspeed/pt/deepspeed_light.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/pt/deepspeed_light.py b/deepspeed/pt/deepspeed_light.py index abbb5cd339ea..a1ebb82cf6a1 100755 --- a/deepspeed/pt/deepspeed_light.py +++ b/deepspeed/pt/deepspeed_light.py @@ -625,7 +625,7 @@ def backward(self, loss, allreduce_gradients=True): self.gradient_accumulation_steps()) self.summary_events = [ (f'Train/Samples/train_loss', - loss.mean().item() * self.gradient_accumulation_steps(), + loss.mean().item(), self.sample_count) ] for event in self.summary_events: # write_summary_events From 232b73c6fecaf12fdebac723ead2769065e1901f Mon Sep 17 00:00:00 2001 From: Shaden Smith Date: Thu, 26 Mar 2020 10:35:33 -0700 Subject: [PATCH 2/2] Pleasing the formatter. --- deepspeed/pt/deepspeed_light.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepspeed/pt/deepspeed_light.py b/deepspeed/pt/deepspeed_light.py index 7d90b6bbd000..fee9ec9cefde 100755 --- a/deepspeed/pt/deepspeed_light.py +++ b/deepspeed/pt/deepspeed_light.py @@ -630,8 +630,7 @@ def backward(self, loss, allreduce_gradients=True): self.summary_events = [ (f'Train/Samples/train_loss', loss.mean().item(), - self.sample_count) - ] + self.sample_count)] for event in self.summary_events: # write_summary_events self.summary_writer.add_scalar(event[0], event[1], event[2]) self.summary_writer.flush()