From acfaa9d0fc65f82c4416c3c5b34e272a4c1dd869 Mon Sep 17 00:00:00 2001 From: binmakeswell Date: Fri, 28 Apr 2023 13:43:19 +0800 Subject: [PATCH 1/3] [chat] set default gemini strategy --- applications/Chat/examples/README.md | 12 ++++++------ applications/Chat/examples/train_prompts.py | 2 +- applications/Chat/examples/train_prompts.sh | 4 ++-- applications/Chat/examples/train_reward_model.py | 2 +- applications/Chat/examples/train_rm.sh | 2 +- applications/Chat/examples/train_sft.py | 2 +- applications/Chat/examples/train_sft.sh | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index e3880c7e4c0c..d2132ae57a56 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -57,7 +57,7 @@ You can also use the following cmd to start a supervised instructs fine-tuning w torchrun --standalone --nproc_per_node=4 train_sft.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_zero2 \ + --strategy colossalai_gemini \ --log_interval 10 \ --save_path /path/to/Coati-7B \ --dataset /path/to/data.json \ @@ -69,7 +69,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ --grad_checkpoint ``` ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' - --model: model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --max_datasets_size: the max size of dataset, type=int, default=None @@ -92,7 +92,7 @@ You can also use the following cmd to start training a reward model. torchrun --standalone --nproc_per_node=4 train_reward_model.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_zero2 \ + --strategy colossalai_gemini \ --loss_fn 'log_exp'\ --save_path 'rmstatic.pt' \ ``` @@ -117,7 +117,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
We also train the reward model based on LLaMA-7B, which reaches the ACC of 72.06% after 1 epoch, performing almost the same as Anthropic's best RM. ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' - --model: model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --model_path: the path of rm model(if continue to train), type=str, default=None @@ -147,7 +147,7 @@ You can also use the cmd following to start PPO training. torchrun --standalone --nproc_per_node=4 train_prompts.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_zero2 \ + --strategy colossalai_gemini \ --prompt_dataset /path/to/your/prompt_dataset \ --pretrain_dataset /path/to/your/pretrain_dataset \ --rm_pretrain /your/pretrain/rm/defination \ @@ -158,7 +158,7 @@ Prompt dataset: the instruction dataset mentioned in the above figure which incl Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning. ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' - --model: model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --rm_model: reward model type, type=str, choices=['gpt2', 'bloom', 'opt', 'llama'], default=None diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py index f4563630aad6..6f5ac00032d2 100644 --- a/applications/Chat/examples/train_prompts.py +++ b/applications/Chat/examples/train_prompts.py @@ -208,7 +208,7 @@ def main(args): parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset') parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], - default='naive', + default='colossalai_gemini', help='strategy to use') parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) parser.add_argument('--pretrain', type=str, default=None) diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh index 7f3b2636ca32..fee85518a5f6 100755 --- a/applications/Chat/examples/train_prompts.sh +++ b/applications/Chat/examples/train_prompts.sh @@ -15,6 +15,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { set_n_least_used_CUDA_VISIBLE_DEVICES 2 -# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2 +# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_gemini -torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2 +torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_gemini diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py index 5198c98dbd15..0aa51c404579 100644 --- a/applications/Chat/examples/train_reward_model.py +++ b/applications/Chat/examples/train_reward_model.py @@ -189,7 +189,7 @@ def train(args): parser = argparse.ArgumentParser() parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], - default='naive') + default='colossalai_gemini') parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh index 80abe62d2a3f..d7ff06967d9c 100755 --- a/applications/Chat/examples/train_rm.sh +++ b/applications/Chat/examples/train_rm.sh @@ -18,7 +18,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2 torchrun --standalone --nproc_per_node=2 train_reward_model.py \ --pretrain \ --model 'bloom' \ - --strategy colossalai_zero2 \ + --strategy colossalai_gemini \ --loss_fn 'log_sig'\ --save_path \ --dataset 'Anthropic/hh-rlhf'\ diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py index b35d228dc593..f75d253e8047 100644 --- a/applications/Chat/examples/train_sft.py +++ b/applications/Chat/examples/train_sft.py @@ -176,7 +176,7 @@ def train(args): parser = argparse.ArgumentParser() parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2', 'colossalai_zero2_cpu'], - default='naive') + default='colossalai_gemini') parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--dataset', type=str, default=None) diff --git a/applications/Chat/examples/train_sft.sh b/applications/Chat/examples/train_sft.sh index 73710d1b19f8..0d33d2950fab 100755 --- a/applications/Chat/examples/train_sft.sh +++ b/applications/Chat/examples/train_sft.sh @@ -1,7 +1,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_zero2 \ + --strategy colossalai_gemini \ --log_interval 10 \ --save_path /path/to/Coati-7B \ --dataset /path/to/data.json \ From 12cb582c14920400a68de8caebc257b0bdede1bf Mon Sep 17 00:00:00 2001 From: binmakeswell Date: Fri, 28 Apr 2023 13:53:16 +0800 Subject: [PATCH 2/3] [chat] set default zero2 strategy --- applications/Chat/examples/README.md | 10 +++++----- applications/Chat/examples/train_prompts.py | 2 +- applications/Chat/examples/train_prompts.sh | 4 ++-- applications/Chat/examples/train_reward_model.py | 2 +- applications/Chat/examples/train_rm.sh | 2 +- applications/Chat/examples/train_sft.py | 2 +- applications/Chat/examples/train_sft.sh | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index d2132ae57a56..1d04f87691d7 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -57,7 +57,7 @@ You can also use the following cmd to start a supervised instructs fine-tuning w torchrun --standalone --nproc_per_node=4 train_sft.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_gemini \ + --strategy colossalai_zero2 \ --log_interval 10 \ --save_path /path/to/Coati-7B \ --dataset /path/to/data.json \ @@ -69,7 +69,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ --grad_checkpoint ``` ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2' - --model: model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --max_datasets_size: the max size of dataset, type=int, default=None @@ -117,7 +117,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
We also train the reward model based on LLaMA-7B, which reaches the ACC of 72.06% after 1 epoch, performing almost the same as Anthropic's best RM. ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2' - --model: model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --model_path: the path of rm model(if continue to train), type=str, default=None @@ -147,7 +147,7 @@ You can also use the cmd following to start PPO training. torchrun --standalone --nproc_per_node=4 train_prompts.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_gemini \ + --strategy colossalai_zero2 \ --prompt_dataset /path/to/your/prompt_dataset \ --pretrain_dataset /path/to/your/pretrain_dataset \ --rm_pretrain /your/pretrain/rm/defination \ @@ -158,7 +158,7 @@ Prompt dataset: the instruction dataset mentioned in the above figure which incl Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning. ### Arg List -- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini' +- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2' - --model: model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' - --pretrain: pretrain model, type=str, default=None - --rm_model: reward model type, type=str, choices=['gpt2', 'bloom', 'opt', 'llama'], default=None diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py index 6f5ac00032d2..a584991cd34e 100644 --- a/applications/Chat/examples/train_prompts.py +++ b/applications/Chat/examples/train_prompts.py @@ -208,7 +208,7 @@ def main(args): parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset') parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], - default='colossalai_gemini', + default='colossalai_zero2', help='strategy to use') parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) parser.add_argument('--pretrain', type=str, default=None) diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh index fee85518a5f6..7f3b2636ca32 100755 --- a/applications/Chat/examples/train_prompts.sh +++ b/applications/Chat/examples/train_prompts.sh @@ -15,6 +15,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { set_n_least_used_CUDA_VISIBLE_DEVICES 2 -# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_gemini +# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2 -torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_gemini +torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2 diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py index 0aa51c404579..48b12336fa67 100644 --- a/applications/Chat/examples/train_reward_model.py +++ b/applications/Chat/examples/train_reward_model.py @@ -189,7 +189,7 @@ def train(args): parser = argparse.ArgumentParser() parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], - default='colossalai_gemini') + default='colossalai_zero2') parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh index d7ff06967d9c..80abe62d2a3f 100755 --- a/applications/Chat/examples/train_rm.sh +++ b/applications/Chat/examples/train_rm.sh @@ -18,7 +18,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2 torchrun --standalone --nproc_per_node=2 train_reward_model.py \ --pretrain \ --model 'bloom' \ - --strategy colossalai_gemini \ + --strategy colossalai_zero2 \ --loss_fn 'log_sig'\ --save_path \ --dataset 'Anthropic/hh-rlhf'\ diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py index f75d253e8047..96914644d433 100644 --- a/applications/Chat/examples/train_sft.py +++ b/applications/Chat/examples/train_sft.py @@ -176,7 +176,7 @@ def train(args): parser = argparse.ArgumentParser() parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2', 'colossalai_zero2_cpu'], - default='colossalai_gemini') + default='colossalai_zero2') parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--dataset', type=str, default=None) diff --git a/applications/Chat/examples/train_sft.sh b/applications/Chat/examples/train_sft.sh index 0d33d2950fab..73710d1b19f8 100755 --- a/applications/Chat/examples/train_sft.sh +++ b/applications/Chat/examples/train_sft.sh @@ -1,7 +1,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_gemini \ + --strategy colossalai_zero2 \ --log_interval 10 \ --save_path /path/to/Coati-7B \ --dataset /path/to/data.json \ From 3774a8350223a60dfdab342327a7d241525e5d4e Mon Sep 17 00:00:00 2001 From: binmakeswell Date: Fri, 28 Apr 2023 13:54:19 +0800 Subject: [PATCH 3/3] [chat] set default zero2 strategy --- applications/Chat/examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index 1d04f87691d7..e76007147b47 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -92,7 +92,7 @@ You can also use the following cmd to start training a reward model. torchrun --standalone --nproc_per_node=4 train_reward_model.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ - --strategy colossalai_gemini \ + --strategy colossalai_zero2 \ --loss_fn 'log_exp'\ --save_path 'rmstatic.pt' \ ```