From 65a4d4423a495919abe07225818cfb353ad0ac4d Mon Sep 17 00:00:00 2001 From: Mingyan Jiang <1829166702@qq.com> Date: Fri, 28 Jul 2023 17:25:55 +0800 Subject: [PATCH 1/2] [doc] fix gradient accumulation doc --- docs/source/en/features/gradient_accumulation_with_booster.md | 2 ++ .../zh-Hans/features/gradient_accumulation_with_booster.md | 3 +++ 2 files changed, 5 insertions(+) diff --git a/docs/source/en/features/gradient_accumulation_with_booster.md b/docs/source/en/features/gradient_accumulation_with_booster.md index 201e3bc2b643..7bc4eb47bcd7 100644 --- a/docs/source/en/features/gradient_accumulation_with_booster.md +++ b/docs/source/en/features/gradient_accumulation_with_booster.md @@ -103,10 +103,12 @@ for idx, (img, label) in enumerate(train_dataloader): with sync_context: output = model(img) train_loss = criterion(output, label) + train_loss = train_loss / GRADIENT_ACCUMULATION booster.backward(train_loss, optimizer) else: output = model(img) train_loss = criterion(output, label) + train_loss = train_loss / GRADIENT_ACCUMULATION booster.backward(train_loss, optimizer) optimizer.step() optimizer.zero_grad() diff --git a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md index a8422060f0ea..4d355dacc740 100644 --- a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md +++ b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md @@ -98,6 +98,7 @@ model, optimizer, criterion, train_dataloader, _ = booster.boost(model=model, 使用booster构建一个普通的训练循环,验证梯度累积。 `param_by_iter` 记录分布训练的信息。 ```python optimizer.zero_grad() +loss = 0 for idx, (img, label) in enumerate(train_dataloader): sync_context = booster.no_sync(model) img = img.cuda() @@ -106,10 +107,12 @@ for idx, (img, label) in enumerate(train_dataloader): with sync_context: output = model(img) train_loss = criterion(output, label) + train_loss = train_loss / GRADIENT_ACCUMULATION booster.backward(train_loss, optimizer) else: output = model(img) train_loss = criterion(output, label) + train_loss = train_loss / GRADIENT_ACCUMULATION booster.backward(train_loss, optimizer) optimizer.step() optimizer.zero_grad() From 8cd947c4f57aa144e6006291b2332b14c54acf73 Mon Sep 17 00:00:00 2001 From: Mingyan Jiang <1829166702@qq.com> Date: Fri, 28 Jul 2023 17:30:58 +0800 Subject: [PATCH 2/2] [doc] fix gradient accumulation doc --- .../zh-Hans/features/gradient_accumulation_with_booster.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md index 4d355dacc740..d121b161b9ff 100644 --- a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md +++ b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md @@ -98,7 +98,6 @@ model, optimizer, criterion, train_dataloader, _ = booster.boost(model=model, 使用booster构建一个普通的训练循环,验证梯度累积。 `param_by_iter` 记录分布训练的信息。 ```python optimizer.zero_grad() -loss = 0 for idx, (img, label) in enumerate(train_dataloader): sync_context = booster.no_sync(model) img = img.cuda()