From acfaa9d0fc65f82c4416c3c5b34e272a4c1dd869 Mon Sep 17 00:00:00 2001
From: binmakeswell <binmakeswell@gmail.com>
Date: Fri, 28 Apr 2023 13:43:19 +0800
Subject: [PATCH 1/3] [chat] set default gemini strategy

---
 applications/Chat/examples/README.md             | 12 ++++++------
 applications/Chat/examples/train_prompts.py      |  2 +-
 applications/Chat/examples/train_prompts.sh      |  4 ++--
 applications/Chat/examples/train_reward_model.py |  2 +-
 applications/Chat/examples/train_rm.sh           |  2 +-
 applications/Chat/examples/train_sft.py          |  2 +-
 applications/Chat/examples/train_sft.sh          |  2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md
index e3880c7e4c0c..d2132ae57a56 100644
--- a/applications/Chat/examples/README.md
+++ b/applications/Chat/examples/README.md
@@ -57,7 +57,7 @@ You can also use the following cmd to start a supervised instructs fine-tuning w
 torchrun --standalone --nproc_per_node=4 train_sft.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_zero2 \
+    --strategy colossalai_gemini \
     --log_interval 10 \
     --save_path  /path/to/Coati-7B \
     --dataset /path/to/data.json \
@@ -69,7 +69,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
     --grad_checkpoint
 ```
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
 - --model:             model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --max_datasets_size: the max size of dataset, type=int, default=None
@@ -92,7 +92,7 @@ You can also use the following cmd to start training a reward model.
 torchrun --standalone --nproc_per_node=4 train_reward_model.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_zero2 \
+    --strategy colossalai_gemini \
     --loss_fn 'log_exp'\
     --save_path 'rmstatic.pt' \
 ```
@@ -117,7 +117,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
 <div align=left>We also train the reward model based on LLaMA-7B, which reaches the ACC of 72.06% after 1 epoch, performing almost the same as Anthropic's best RM.
 
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
 - --model:             model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --model_path:        the path of rm model(if continue to train), type=str, default=None
@@ -147,7 +147,7 @@ You can also use the cmd following to start PPO training.
 torchrun --standalone --nproc_per_node=4 train_prompts.py \
          --pretrain "/path/to/LLaMa-7B/" \
          --model 'llama' \
-         --strategy colossalai_zero2 \
+         --strategy colossalai_gemini \
          --prompt_dataset /path/to/your/prompt_dataset \
          --pretrain_dataset /path/to/your/pretrain_dataset \
          --rm_pretrain /your/pretrain/rm/defination \
@@ -158,7 +158,7 @@ Prompt dataset: the instruction dataset mentioned in the above figure which incl
 Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning.
 
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
 - --model:             model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --rm_model:          reward model type, type=str, choices=['gpt2', 'bloom', 'opt', 'llama'], default=None
diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
index f4563630aad6..6f5ac00032d2 100644
--- a/applications/Chat/examples/train_prompts.py
+++ b/applications/Chat/examples/train_prompts.py
@@ -208,7 +208,7 @@ def main(args):
     parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset')
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
-                        default='naive',
+                        default='colossalai_gemini',
                         help='strategy to use')
     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
     parser.add_argument('--pretrain', type=str, default=None)
diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh
index 7f3b2636ca32..fee85518a5f6 100755
--- a/applications/Chat/examples/train_prompts.sh
+++ b/applications/Chat/examples/train_prompts.sh
@@ -15,6 +15,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
+# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_gemini
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2
+torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_gemini
diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py
index 5198c98dbd15..0aa51c404579 100644
--- a/applications/Chat/examples/train_reward_model.py
+++ b/applications/Chat/examples/train_reward_model.py
@@ -189,7 +189,7 @@ def train(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
-                        default='naive')
+                        default='colossalai_gemini')
     parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom')
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--model_path', type=str, default=None)
diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh
index 80abe62d2a3f..d7ff06967d9c 100755
--- a/applications/Chat/examples/train_rm.sh
+++ b/applications/Chat/examples/train_rm.sh
@@ -18,7 +18,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
 torchrun --standalone --nproc_per_node=2 train_reward_model.py \
    --pretrain  <your pretrain path> \
    --model 'bloom' \
-   --strategy colossalai_zero2 \
+   --strategy colossalai_gemini \
    --loss_fn 'log_sig'\
    --save_path <your model saving path>\
    --dataset 'Anthropic/hh-rlhf'\
diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py
index b35d228dc593..f75d253e8047 100644
--- a/applications/Chat/examples/train_sft.py
+++ b/applications/Chat/examples/train_sft.py
@@ -176,7 +176,7 @@ def train(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2', 'colossalai_zero2_cpu'],
-                        default='naive')
+                        default='colossalai_gemini')
     parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--dataset', type=str, default=None)
diff --git a/applications/Chat/examples/train_sft.sh b/applications/Chat/examples/train_sft.sh
index 73710d1b19f8..0d33d2950fab 100755
--- a/applications/Chat/examples/train_sft.sh
+++ b/applications/Chat/examples/train_sft.sh
@@ -1,7 +1,7 @@
 torchrun --standalone --nproc_per_node=4 train_sft.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_zero2 \
+    --strategy colossalai_gemini \
     --log_interval 10 \
     --save_path  /path/to/Coati-7B \
     --dataset /path/to/data.json \

From 12cb582c14920400a68de8caebc257b0bdede1bf Mon Sep 17 00:00:00 2001
From: binmakeswell <binmakeswell@gmail.com>
Date: Fri, 28 Apr 2023 13:53:16 +0800
Subject: [PATCH 2/3] [chat] set default zero2 strategy

---
 applications/Chat/examples/README.md             | 10 +++++-----
 applications/Chat/examples/train_prompts.py      |  2 +-
 applications/Chat/examples/train_prompts.sh      |  4 ++--
 applications/Chat/examples/train_reward_model.py |  2 +-
 applications/Chat/examples/train_rm.sh           |  2 +-
 applications/Chat/examples/train_sft.py          |  2 +-
 applications/Chat/examples/train_sft.sh          |  2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md
index d2132ae57a56..1d04f87691d7 100644
--- a/applications/Chat/examples/README.md
+++ b/applications/Chat/examples/README.md
@@ -57,7 +57,7 @@ You can also use the following cmd to start a supervised instructs fine-tuning w
 torchrun --standalone --nproc_per_node=4 train_sft.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_gemini \
+    --strategy colossalai_zero2 \
     --log_interval 10 \
     --save_path  /path/to/Coati-7B \
     --dataset /path/to/data.json \
@@ -69,7 +69,7 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
     --grad_checkpoint
 ```
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2'
 - --model:             model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --max_datasets_size: the max size of dataset, type=int, default=None
@@ -117,7 +117,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
 <div align=left>We also train the reward model based on LLaMA-7B, which reaches the ACC of 72.06% after 1 epoch, performing almost the same as Anthropic's best RM.
 
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2'
 - --model:             model type, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --model_path:        the path of rm model(if continue to train), type=str, default=None
@@ -147,7 +147,7 @@ You can also use the cmd following to start PPO training.
 torchrun --standalone --nproc_per_node=4 train_prompts.py \
          --pretrain "/path/to/LLaMa-7B/" \
          --model 'llama' \
-         --strategy colossalai_gemini \
+         --strategy colossalai_zero2 \
          --prompt_dataset /path/to/your/prompt_dataset \
          --pretrain_dataset /path/to/your/pretrain_dataset \
          --rm_pretrain /your/pretrain/rm/defination \
@@ -158,7 +158,7 @@ Prompt dataset: the instruction dataset mentioned in the above figure which incl
 Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning.
 
 ### Arg List
-- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_gemini'
+- --strategy:          the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2'
 - --model:             model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
 - --pretrain:          pretrain model, type=str, default=None
 - --rm_model:          reward model type, type=str, choices=['gpt2', 'bloom', 'opt', 'llama'], default=None
diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
index 6f5ac00032d2..a584991cd34e 100644
--- a/applications/Chat/examples/train_prompts.py
+++ b/applications/Chat/examples/train_prompts.py
@@ -208,7 +208,7 @@ def main(args):
     parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset')
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
-                        default='colossalai_gemini',
+                        default='colossalai_zero2',
                         help='strategy to use')
     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
     parser.add_argument('--pretrain', type=str, default=None)
diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh
index fee85518a5f6..7f3b2636ca32 100755
--- a/applications/Chat/examples/train_prompts.sh
+++ b/applications/Chat/examples/train_prompts.sh
@@ -15,6 +15,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_gemini
+# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_gemini
+torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2
diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py
index 0aa51c404579..48b12336fa67 100644
--- a/applications/Chat/examples/train_reward_model.py
+++ b/applications/Chat/examples/train_reward_model.py
@@ -189,7 +189,7 @@ def train(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
-                        default='colossalai_gemini')
+                        default='colossalai_zero2')
     parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom')
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--model_path', type=str, default=None)
diff --git a/applications/Chat/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh
index d7ff06967d9c..80abe62d2a3f 100755
--- a/applications/Chat/examples/train_rm.sh
+++ b/applications/Chat/examples/train_rm.sh
@@ -18,7 +18,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
 torchrun --standalone --nproc_per_node=2 train_reward_model.py \
    --pretrain  <your pretrain path> \
    --model 'bloom' \
-   --strategy colossalai_gemini \
+   --strategy colossalai_zero2 \
    --loss_fn 'log_sig'\
    --save_path <your model saving path>\
    --dataset 'Anthropic/hh-rlhf'\
diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py
index f75d253e8047..96914644d433 100644
--- a/applications/Chat/examples/train_sft.py
+++ b/applications/Chat/examples/train_sft.py
@@ -176,7 +176,7 @@ def train(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2', 'colossalai_zero2_cpu'],
-                        default='colossalai_gemini')
+                        default='colossalai_zero2')
     parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--dataset', type=str, default=None)
diff --git a/applications/Chat/examples/train_sft.sh b/applications/Chat/examples/train_sft.sh
index 0d33d2950fab..73710d1b19f8 100755
--- a/applications/Chat/examples/train_sft.sh
+++ b/applications/Chat/examples/train_sft.sh
@@ -1,7 +1,7 @@
 torchrun --standalone --nproc_per_node=4 train_sft.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_gemini \
+    --strategy colossalai_zero2 \
     --log_interval 10 \
     --save_path  /path/to/Coati-7B \
     --dataset /path/to/data.json \

From 3774a8350223a60dfdab342327a7d241525e5d4e Mon Sep 17 00:00:00 2001
From: binmakeswell <binmakeswell@gmail.com>
Date: Fri, 28 Apr 2023 13:54:19 +0800
Subject: [PATCH 3/3] [chat] set default zero2 strategy

---
 applications/Chat/examples/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md
index 1d04f87691d7..e76007147b47 100644
--- a/applications/Chat/examples/README.md
+++ b/applications/Chat/examples/README.md
@@ -92,7 +92,7 @@ You can also use the following cmd to start training a reward model.
 torchrun --standalone --nproc_per_node=4 train_reward_model.py \
     --pretrain "/path/to/LLaMa-7B/" \
     --model 'llama' \
-    --strategy colossalai_gemini \
+    --strategy colossalai_zero2 \
     --loss_fn 'log_exp'\
     --save_path 'rmstatic.pt' \
 ```