From 77ed98df3983d2e0aafbd832a3260e811d7dde8d Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Thu, 6 Jan 2022 19:03:23 +0000 Subject: [PATCH 1/2] Improve reproducibility --- HelloDeepSpeed/train_bert.py | 4 ++++ HelloDeepSpeed/train_bert_ds.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/HelloDeepSpeed/train_bert.py b/HelloDeepSpeed/train_bert.py index 14d61f00c..45e536298 100644 --- a/HelloDeepSpeed/train_bert.py +++ b/HelloDeepSpeed/train_bert.py @@ -6,6 +6,7 @@ from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union +import random import datasets import fire import loguru @@ -788,4 +789,7 @@ def train( if __name__ == "__main__": + torch.manual_seed(42) + np.random.seed(0) + random.seed(0) fire.Fire(train) diff --git a/HelloDeepSpeed/train_bert_ds.py b/HelloDeepSpeed/train_bert_ds.py index 421d03daf..dfb5f272a 100644 --- a/HelloDeepSpeed/train_bert_ds.py +++ b/HelloDeepSpeed/train_bert_ds.py @@ -10,6 +10,7 @@ from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union +import random import datasets import fire import loguru @@ -802,4 +803,7 @@ def train( if __name__ == "__main__": + torch.manual_seed(42) + np.random.seed(0) + random.seed(0) fire.Fire(train) From f3ab12029d060e1947c6b0fd55c8711af01ec63a Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Thu, 6 Jan 2022 19:23:35 +0000 Subject: [PATCH 2/2] Repro tips link --- HelloDeepSpeed/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HelloDeepSpeed/README.md b/HelloDeepSpeed/README.md index 9bc7adac0..bbc8761de 100644 --- a/HelloDeepSpeed/README.md +++ b/HelloDeepSpeed/README.md @@ -31,7 +31,7 @@ One of the most important parts of training ML models is for the experiments to * Save all the hyperparameters associated with the experiment (be it taken from a config or parsed from the command line) -* Seed your random generators +* Seed your random generators. Some useful tips can be found [here](https://pytorch.org/docs/stable/notes/randomness.html?highlight=reproducibility). * Specify all the packages and their versions. This can be a `requirements.txt` file, a conda `env.yaml` file or a `pyproject.toml` file. If you want complete reproducibility, you can also include a `Dockerfile` to specify the environment to run the experiment in.