Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
994b40c
run the base
Mar 16, 2023
0390f6e
working on dist ppo
Mar 16, 2023
c1df61b
sync
Mar 20, 2023
32837c3
Merge remote-tracking branch 'upstream/main' into chatgpt_dist_ppo
Mar 20, 2023
518f837
detached trainer
Mar 20, 2023
b707ba2
update detached trainer. no maker update function
Mar 20, 2023
1311924
facing init problem
Mar 21, 2023
29976fa
1 maker 1 trainer detached run. but no model update
Mar 21, 2023
ea4761a
Merge branch 'hpcaitech:main' into detached_ppo
CsRic Mar 21, 2023
523e209
facing cuda problem
Mar 22, 2023
45361c2
fix save functions
Mar 22, 2023
886cc98
Merge branch 'detached_ppo' of https://github.com/CsRic/ColossalAI in…
Mar 22, 2023
42aa4c7
verified maker update
Mar 22, 2023
26d82b5
nothing
Mar 23, 2023
517ff22
add ignore
Mar 23, 2023
dc91d58
Merge remote-tracking branch 'upstream/main' into detached_ppo
Mar 23, 2023
b91348d
analyize loss issue
Mar 24, 2023
b882fdd
fix detached ppo loss issue
Mar 24, 2023
ebd2be9
remove some debug codes
Mar 24, 2023
650ec5b
facing 2m1t stuck issue
Mar 27, 2023
b40974b
Merge remote-tracking branch 'upstream/main' into detached_ppo
Mar 27, 2023
f791fb7
2m1t verified
Mar 27, 2023
f468724
do not use torchrun
Mar 27, 2023
12b94f7
working on 2m2t
Mar 27, 2023
05df7d7
working on 2m2t
Mar 27, 2023
0773697
initialize strategy in ray actor env
Mar 28, 2023
9451a54
facing actor's init order issue
Mar 28, 2023
9626518
facing ddp model update issue (need unwarp ddp)
Mar 28, 2023
d637032
unwrap ddp actor
Mar 29, 2023
7dadc80
checking 1m2t stuck problem
Mar 29, 2023
09f611d
nothing
Mar 29, 2023
459639c
set timeout for trainer choosing. It solves the stuck problem!
Mar 29, 2023
65363e1
delete some debug output
Mar 29, 2023
2f8036b
rename to sync with upstream
Mar 30, 2023
7e5c8f2
rename to sync with upstream
Mar 30, 2023
c36d58a
merge upstream
Mar 30, 2023
c0649c3
coati rename
Mar 30, 2023
db29760
Merge branch 'hpcaitech:main' into detached_ppo
CsRic Mar 31, 2023
3c6f68c
nothing
Mar 31, 2023
334956a
merge
Mar 31, 2023
35e4602
I am going to detach the replaybuffer from trainer and make it a Ray …
Apr 3, 2023
04069cd
experience_maker_holder performs target-revolving _send_experience() …
Apr 4, 2023
117f08c
Merge 'upstream/main'
Apr 4, 2023
b0a002e
Merge remote-tracking branch 'upstream/main' into detached_ppo
Apr 12, 2023
9051002
Merge remote-tracking branch 'upstream/main' into detached_ppo
Apr 13, 2023
3a4d0e7
move code to ray subfolder
Apr 13, 2023
95a6c72
working on pipeline inference
Apr 13, 2023
19fab46
apply comments
Apr 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions applications/Chat/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,5 @@ docs/.build

# wandb log
example/wandb/

examples/awesome-chatgpt-prompts/
2 changes: 2 additions & 0 deletions applications/Chat/coati/ray/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .src.detached_replay_buffer import DetachedReplayBuffer
from .src.detached_trainer_ppo import DetachedPPOTrainer
153 changes: 153 additions & 0 deletions applications/Chat/coati/ray/example/1m1t.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import argparse
from copy import deepcopy

import pandas as pd
import torch
from coati.trainer import PPOTrainer


from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer

from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
from coati.experience_maker import NaiveExperienceMaker
from torch.optim import Adam
from transformers import AutoTokenizer, BloomTokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer

from colossalai.nn.optimizer import HybridAdam

import ray
import os
import socket

def get_free_port():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('', 0))
return s.getsockname()[1]


def get_local_ip():
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(('8.8.8.8', 80))
return s.getsockname()[0]

def main(args):
master_addr = str(get_local_ip())
# trainer_env_info
trainer_port = str(get_free_port())
env_info_trainer = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : trainer_port,
'master_addr' : master_addr}

# maker_env_info
maker_port = str(get_free_port())
env_info_maker = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : maker_port,
'master_addr' : master_addr}

# configure tokenizer
if args.model == 'gpt2':
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
else:
raise ValueError(f'Unsupported model "{args.model}"')

# configure Trainer
trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=2).remote(
experience_maker_holder_name_list=["maker1"],
strategy=args.trainer_strategy,
model=args.model,
env_info = env_info_trainer,
pretrained=args.pretrain,
lora_rank=args.lora_rank,
train_batch_size=args.train_batch_size,
buffer_limit=16,
experience_batch_size=args.experience_batch_size,
max_epochs=args.max_epochs,
#kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
)

# configure Experience Maker
experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
detached_trainer_name_list=["trainer1"],
strategy=args.maker_strategy,
env_info = env_info_maker,
experience_batch_size=args.experience_batch_size,
kl_coef=0.1,
#kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
)

# trainer send its actor and critic to experience holders.
ray.get(trainer_ref.initialize_remote_makers.remote())

# configure sampler
dataset = pd.read_csv(args.prompt_path)['prompt']

def tokenize_fn(texts):
# MUST padding to max length to ensure inputs of all ranks have the same length
# Different length may lead to hang when using gemini, as different generation steps
batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
return {k: v.cuda() for k, v in batch.items()}

trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)

ray.get([trainer_done_ref, maker_done_ref])

# save model checkpoint after fitting
trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
# save optimizer checkpoint on all ranks
if args.need_optim_ckpt:
trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
only_rank0=False)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('prompt_path')
parser.add_argument('--trainer_strategy',
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='naive')
parser.add_argument('--maker_strategy',
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='naive')
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
parser.add_argument('--need_optim_ckpt', type=bool, default=False)
parser.add_argument('--num_episodes', type=int, default=10)
parser.add_argument('--max_timesteps', type=int, default=10)
parser.add_argument('--update_timesteps', type=int, default=10)
parser.add_argument('--max_epochs', type=int, default=5)
parser.add_argument('--train_batch_size', type=int, default=8)
parser.add_argument('--experience_batch_size', type=int, default=8)
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")

parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
ray.init(namespace=os.environ["RAY_NAMESPACE"])
main(args)
23 changes: 23 additions & 0 deletions applications/Chat/coati/ray/example/1m1t.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
set_n_least_used_CUDA_VISIBLE_DEVICES() {
local n=${1:-"9999"}
echo "GPU Memory Usage:"
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
| tail -n +2 \
| nl -v 0 \
| tee /dev/tty \
| sort -g -k 2 \
| awk '{print $1}' \
| head -n $n)
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
echo "Now CUDA_VISIBLE_DEVICES is set to:"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}

set_n_least_used_CUDA_VISIBLE_DEVICES 2

export RAY_NAMESPACE="admin"

python 1m1t.py "/path/to/prompts.csv" \
--trainer_strategy colossalai_zero2 --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
--num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
--max_epochs 10 --debug
186 changes: 186 additions & 0 deletions applications/Chat/coati/ray/example/1m2t.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import argparse
from copy import deepcopy

import pandas as pd
import torch
from coati.trainer import PPOTrainer


from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer

from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
from coati.experience_maker import NaiveExperienceMaker
from torch.optim import Adam
from transformers import AutoTokenizer, BloomTokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer

from colossalai.nn.optimizer import HybridAdam

import ray
import os
import socket


def get_free_port():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('', 0))
return s.getsockname()[1]


def get_local_ip():
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(('8.8.8.8', 80))
return s.getsockname()[0]

def main(args):
master_addr = str(get_local_ip())
# trainer_env_info
trainer_port = str(get_free_port())
env_info_trainer_1 = {'local_rank' : '0',
'rank' : '0',
'world_size' : '2',
'master_port' : trainer_port,
'master_addr' : master_addr}
env_info_trainer_2 = {'local_rank' : '0',
'rank' : '1',
'world_size' : '2',
'master_port' : trainer_port,
'master_addr' : master_addr}
# maker_env_info
maker_port = str(get_free_port())
env_info_maker_1 = {'local_rank' : '0',
'rank' : '0',
'world_size' : '2',
'master_port' : maker_port,
'master_addr' : master_addr}
print([env_info_trainer_1,
env_info_trainer_2,
env_info_maker_1])
ray.init(dashboard_port = 1145)
# configure tokenizer
if args.model == 'gpt2':
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
else:
raise ValueError(f'Unsupported model "{args.model}"')

# configure Trainer
trainer_1_ref = DetachedPPOTrainer.options(name="trainer1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
experience_maker_holder_name_list=["maker1"],
strategy=args.trainer_strategy,
model=args.model,
env_info=env_info_trainer_1,
pretrained=args.pretrain,
lora_rank=args.lora_rank,
train_batch_size=args.train_batch_size,
buffer_limit=16,
experience_batch_size=args.experience_batch_size,
max_epochs=args.max_epochs,
#kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
)

trainer_2_ref = DetachedPPOTrainer.options(name="trainer2", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
experience_maker_holder_name_list=["maker1"],
strategy=args.trainer_strategy,
model=args.model,
env_info=env_info_trainer_2,
pretrained=args.pretrain,
lora_rank=args.lora_rank,
train_batch_size=args.train_batch_size,
buffer_limit=16,
experience_batch_size=args.experience_batch_size,
max_epochs=args.max_epochs,
#kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug= args.debug,
)

# configure Experience Maker
experience_holder_1_ref = ExperienceMakerHolder.options(name="maker1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
detached_trainer_name_list=["trainer1", "trainer2"],
strategy=args.maker_strategy,
env_info=env_info_maker_1,
experience_batch_size=args.experience_batch_size,
kl_coef=0.1,
#kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
)

# trainer send its actor and critic to experience holders.
# TODO: balance duty
ray.get(trainer_1_ref.initialize_remote_makers.remote())

# configure sampler
dataset = pd.read_csv(args.prompt_path)['prompt']

def tokenize_fn(texts):
# MUST padding to max length to ensure inputs of all ranks have the same length
# Different length may lead to hang when using gemini, as different generation steps
batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
return {k: v.cuda() for k, v in batch.items()}

trainer_1_done_ref = trainer_1_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
trainer_2_done_ref = trainer_2_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs * 2 + 3 # +3 for fault tolerance
maker_1_done_ref = experience_holder_1_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)

ray.get([trainer_1_done_ref, trainer_2_done_ref, maker_1_done_ref])
# save model checkpoint after fitting
trainer_1_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
trainer_2_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
# save optimizer checkpoint on all ranks
if args.need_optim_ckpt:
trainer_1_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
only_rank0=False)
trainer_2_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
only_rank0=False)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('prompt_path')
parser.add_argument('--trainer_strategy',
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='naive')
parser.add_argument('--maker_strategy',
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='naive')
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
parser.add_argument('--need_optim_ckpt', type=bool, default=False)
parser.add_argument('--num_episodes', type=int, default=10)
parser.add_argument('--max_timesteps', type=int, default=10)
parser.add_argument('--update_timesteps', type=int, default=10)
parser.add_argument('--max_epochs', type=int, default=5)
parser.add_argument('--train_batch_size', type=int, default=8)
parser.add_argument('--experience_batch_size', type=int, default=8)
parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")

parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
main(args)
23 changes: 23 additions & 0 deletions applications/Chat/coati/ray/example/1m2t.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
set_n_least_used_CUDA_VISIBLE_DEVICES() {
local n=${1:-"9999"}
echo "GPU Memory Usage:"
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
| tail -n +2 \
| nl -v 0 \
| tee /dev/tty \
| sort -g -k 2 \
| awk '{print $1}' \
| head -n $n)
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
echo "Now CUDA_VISIBLE_DEVICES is set to:"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}

set_n_least_used_CUDA_VISIBLE_DEVICES 2

export RAY_NAMESPACE="admin"

python 1m2t.py "/path/to/prompts.csv" --model gpt2 \
--maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
--num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
--max_epochs 10 #--debug
Loading