From f82f432ea0f7a524d5a2a7b412cc0bcbcb28e965 Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Wed, 22 Mar 2023 17:18:13 +0800 Subject: [PATCH 1/4] Add RoBERTa for RLHF Stage 2 & 3 (test) RoBERTa for RLHF Stage 2 & 3 (still in testing) Revert "Add RoBERTa for RLHF Stage 2 & 3 (test)" This reverts commit 06741d894dcbe958acd4e10d771f22275e20e368. Add RoBERTa for RLHF stage 2 & 3 1. add roberta folder under model folder 2. add roberta option in train_reward_model.py 3. add some test in testci Update test_ci.sh Revert "Update test_ci.sh" This reverts commit 9c7352b81766f3177d31eeec0ec178a301df966a. Add RoBERTa for RLHF Stage 2 & 3 (test) RoBERTa for RLHF Stage 2 & 3 (still in testing) Revert "Add RoBERTa for RLHF Stage 2 & 3 (test)" This reverts commit 06741d894dcbe958acd4e10d771f22275e20e368. Add RoBERTa for RLHF stage 2 & 3 1. add roberta folder under model folder 2. add roberta option in train_reward_model.py 3. add some test in testci Update test_ci.sh Revert "Update test_ci.sh" This reverts commit 9c7352b81766f3177d31eeec0ec178a301df966a. update roberta with coati chat ci update Revert "chat ci update" This reverts commit 17ae7ae01fa752bd3289fc39069868fde99cf846. From c686cf2688b56fccc8b8e4d5ced124990fad6ba4 Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Wed, 26 Apr 2023 16:44:10 +0800 Subject: [PATCH 2/4] Update README.md Update README.md --- applications/Chat/examples/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index af8ded005600..f95015d8d0c5 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -151,6 +151,10 @@ torchrun --standalone --nproc_per_node=4 train_prompts.py \ --rm_pretrain /your/pretrain/rm/defination \ --rm_path /your/rm/model/path ``` + +Prompt dataset: the instruction dataset mentioned in the above figure which includes the instructions. +Pretrain dataset: the pretrain dataset including both the instruction and response, e.g. [alpaca_data.json](https://github.com/tatsu-lab/stanford_alpaca). + ### Arg List - --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive' - --model: model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom' From cf36b11a04ea9553526dc3ad0cb2d95e341121cd Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Thu, 27 Apr 2023 11:08:56 +0800 Subject: [PATCH 3/4] update readme --- applications/Chat/examples/README.md | 8 ++++---- applications/Chat/examples/train_prompts.py | 4 ++-- applications/Chat/examples/train_prompts.sh | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index f95015d8d0c5..0083dc37227f 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -146,14 +146,14 @@ torchrun --standalone --nproc_per_node=4 train_prompts.py \ --pretrain "/path/to/LLaMa-7B/" \ --model 'llama' \ --strategy colossalai_zero2 \ - --prompt_path /path/to/your/prompt_dataset \ + --prompt_dataset /path/to/your/prompt_dataset \ --pretrain_dataset /path/to/your/pretrain_dataset \ --rm_pretrain /your/pretrain/rm/defination \ --rm_path /your/rm/model/path ``` -Prompt dataset: the instruction dataset mentioned in the above figure which includes the instructions. -Pretrain dataset: the pretrain dataset including both the instruction and response, e.g. [alpaca_data.json](https://github.com/tatsu-lab/stanford_alpaca). +Prompt dataset: the instruction dataset mentioned in the above figure which includes the instructions, e.g. you can use [seed_prompts_ch.jsonl](https://github.com/XueFuzhao/InstructionWild/blob/main/data/seed_prompts_ch.jsonl) or [seed_prompts_en.jsonl](https://github.com/XueFuzhao/InstructionWild/blob/main/data/seed_prompts_en.jsonl) in InstructionWild. +Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning. ### Arg List - --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive' @@ -163,7 +163,7 @@ Pretrain dataset: the pretrain dataset including both the instruction and respon - --rm_pretrain: pretrain model for reward model, type=str, default=None - --rm_path: the path of rm model, type=str, default=None - --save_path: path to save the model, type=str, default='output' -- --prompt_path: path of the prompt dataset, type=str, default=None +- --prompt_dataset: path of the prompt dataset, type=str, default=None - --pretrain_dataset: path of the ptx dataset, type=str, default=None - --need_optim_ckpt: whether to save optim ckpt, type=bool, default=False - --num_episodes: num of episodes for training, type=int, default=10 diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py index c0455f3a72c2..292caa1b36b1 100644 --- a/applications/Chat/examples/train_prompts.py +++ b/applications/Chat/examples/train_prompts.py @@ -139,7 +139,7 @@ def main(args): data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) - prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_path, max_datasets_size=16384) + prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384) if dist.is_initialized() and dist.get_world_size() > 1: prompt_sampler = DistributedSampler(prompt_dataset, shuffle=True, seed=42, drop_last=True) else: @@ -204,7 +204,7 @@ def main(args): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--prompt_path', type=str, default=None, help='path to the prompt dataset') + parser.add_argument('--prompt_dataset', type=str, default=None, help='path to the prompt dataset') parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset') parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh index 8e1ce67ecc64..7f3b2636ca32 100755 --- a/applications/Chat/examples/train_prompts.sh +++ b/applications/Chat/examples/train_prompts.sh @@ -17,4 +17,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2 # torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2 -torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_path /path/to/data.json --strategy colossalai_zero2 +torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2 From ccdcd4ece266afca0ca893a0c163afffb375b7e6 Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Thu, 27 Apr 2023 11:38:34 +0800 Subject: [PATCH 4/4] Update test_ci.sh --- applications/Chat/examples/test_ci.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/applications/Chat/examples/test_ci.sh b/applications/Chat/examples/test_ci.sh index 32f5858a51b6..a86e2295a62c 100755 --- a/applications/Chat/examples/test_ci.sh +++ b/applications/Chat/examples/test_ci.sh @@ -99,7 +99,7 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ rm -rf ${BASE}/rm_ckpt.pt -torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ +torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \ --update_timesteps 2 --max_epochs 1 --train_batch_size 2 \ --pretrain 'facebook/opt-350m' --model opt \ @@ -108,7 +108,7 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path --save_path ${BASE}/actor_checkpoint_prompts.pt rm -rf ${BASE}/rm_ckpt_opt.pt -torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ +torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ --strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \ --update_timesteps 2 --max_epochs 1 --train_batch_size 2 \ --pretrain 'gpt2' --model gpt2 \