diff --git a/HelloDeepSpeed/README.md b/HelloDeepSpeed/README.md index 9bc7adac0..bbc8761de 100644 --- a/HelloDeepSpeed/README.md +++ b/HelloDeepSpeed/README.md @@ -31,7 +31,7 @@ One of the most important parts of training ML models is for the experiments to * Save all the hyperparameters associated with the experiment (be it taken from a config or parsed from the command line) -* Seed your random generators +* Seed your random generators. Some useful tips can be found [here](https://pytorch.org/docs/stable/notes/randomness.html?highlight=reproducibility). * Specify all the packages and their versions. This can be a `requirements.txt` file, a conda `env.yaml` file or a `pyproject.toml` file. If you want complete reproducibility, you can also include a `Dockerfile` to specify the environment to run the experiment in. diff --git a/HelloDeepSpeed/train_bert.py b/HelloDeepSpeed/train_bert.py index 14d61f00c..45e536298 100644 --- a/HelloDeepSpeed/train_bert.py +++ b/HelloDeepSpeed/train_bert.py @@ -6,6 +6,7 @@ from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union +import random import datasets import fire import loguru @@ -788,4 +789,7 @@ def train( if __name__ == "__main__": + torch.manual_seed(42) + np.random.seed(0) + random.seed(0) fire.Fire(train) diff --git a/HelloDeepSpeed/train_bert_ds.py b/HelloDeepSpeed/train_bert_ds.py index 421d03daf..dfb5f272a 100644 --- a/HelloDeepSpeed/train_bert_ds.py +++ b/HelloDeepSpeed/train_bert_ds.py @@ -10,6 +10,7 @@ from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union +import random import datasets import fire import loguru @@ -802,4 +803,7 @@ def train( if __name__ == "__main__": + torch.manual_seed(42) + np.random.seed(0) + random.seed(0) fire.Fire(train)