From 4b93a385e6bc2b3e99ca63a8cb3b92e9e311daaf Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Sat, 20 Feb 2021 09:45:27 -0800 Subject: [PATCH 1/2] fix log(0) & 1/log(1) bugs --- deepspeed/runtime/lr_schedules.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py index 515233851a1d..19b1963d9033 100755 --- a/deepspeed/runtime/lr_schedules.py +++ b/deepspeed/runtime/lr_schedules.py @@ -706,6 +706,7 @@ def __init__(self, self.min_lrs = self._format_param(self.optimizer, warmup_min_lr, "min_lr") self.max_lrs = self._format_param(self.optimizer, warmup_max_lr, "max_lr") self.delta_lrs = [big - small for big, small in zip(self.max_lrs, self.min_lrs)] + warmup_num_steps = max(2, warmup_num_steps) self.warmup_num_steps = warmup_num_steps self.inverse_log_warm_up = 1.0 / math.log(warmup_num_steps) self.last_batch_iteration = last_batch_iteration From 2eab8eb2cd931b0500b6606b4e3828cc80ddabc9 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Sat, 20 Feb 2021 09:56:27 -0800 Subject: [PATCH 2/2] simplify --- deepspeed/runtime/lr_schedules.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py index 19b1963d9033..7846da12fdbd 100755 --- a/deepspeed/runtime/lr_schedules.py +++ b/deepspeed/runtime/lr_schedules.py @@ -706,9 +706,8 @@ def __init__(self, self.min_lrs = self._format_param(self.optimizer, warmup_min_lr, "min_lr") self.max_lrs = self._format_param(self.optimizer, warmup_max_lr, "max_lr") self.delta_lrs = [big - small for big, small in zip(self.max_lrs, self.min_lrs)] - warmup_num_steps = max(2, warmup_num_steps) - self.warmup_num_steps = warmup_num_steps - self.inverse_log_warm_up = 1.0 / math.log(warmup_num_steps) + self.warmup_num_steps = max(2, warmup_num_steps) + self.inverse_log_warm_up = 1.0 / math.log(self.warmup_num_steps) self.last_batch_iteration = last_batch_iteration def get_lr(self):