Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions applications/Chat/coati/models/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def sample(model: nn.Module,
# update generated ids, model inputs for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
if update_model_kwargs_fn is not None:
model_kwargs = update_model_kwargs_fn(outputs, **model_kwargs)

model_kwargs = update_model_kwargs_fn(outputs, model_kwargs)
# if eos_token was found in one sentence, set sentence to finished
if eos_token_id is not None:
unfinished_sequences = unfinished_sequences.mul((next_tokens != eos_token_id).long())
Expand Down
71 changes: 42 additions & 29 deletions applications/Chat/coati/ray/example/1m1t.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
import argparse
import os
import socket
from copy import deepcopy

import pandas as pd
import ray
import torch
from coati.trainer import PPOTrainer


from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
from coati.experience_maker import NaiveExperienceMaker
from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer

from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
from coati.trainer import PPOTrainer
from coati.trainer.callbacks.performance_evaluator import (
ExperienceMakerPerformanceEvaluator,
TrainerPerformaceEvaluator,
)
from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
from coati.experience_maker import NaiveExperienceMaker
from torch.optim import Adam
from transformers import AutoTokenizer, BloomTokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer

from colossalai.nn.optimizer import HybridAdam

import ray
import os
import socket

def get_free_port():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
Expand All @@ -31,24 +32,29 @@ def get_local_ip():
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(('8.8.8.8', 80))
return s.getsockname()[0]



def main(args):
master_addr = str(get_local_ip())
# trainer_env_info
trainer_port = str(get_free_port())
env_info_trainer = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : trainer_port,
'master_addr' : master_addr}

env_info_trainer = {
'local_rank': '0',
'rank': '0',
'world_size': '1',
'master_port': trainer_port,
'master_addr': master_addr
}

# maker_env_info
maker_port = str(get_free_port())
env_info_maker = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : maker_port,
'master_addr' : master_addr}
env_info_maker = {
'local_rank': '0',
'rank': '0',
'world_size': '1',
'master_port': maker_port,
'master_addr': master_addr
}

# configure tokenizer
if args.model == 'gpt2':
Expand All @@ -67,37 +73,39 @@ def main(args):
experience_maker_holder_name_list=["maker1"],
strategy=args.trainer_strategy,
model=args.model,
env_info = env_info_trainer,
env_info=env_info_trainer,
pretrained=args.pretrain,
lora_rank=args.lora_rank,
train_batch_size=args.train_batch_size,
buffer_limit=16,
experience_batch_size=args.experience_batch_size,
max_epochs=args.max_epochs,
#kwargs:
# kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
eval_performance=True,
debug=args.debug,
)

# configure Experience Maker
experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
detached_trainer_name_list=["trainer1"],
strategy=args.maker_strategy,
env_info = env_info_maker,
env_info=env_info_maker,
experience_batch_size=args.experience_batch_size,
kl_coef=0.1,
#kwargs:
# kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
eval_performance=True,
debug=args.debug,
)

Expand All @@ -113,19 +121,24 @@ def tokenize_fn(texts):
batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
return {k: v.cuda() for k, v in batch.items()}

trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes,
max_timesteps=args.max_timesteps,
update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * \
args.max_epochs + 3 # +3 for fault tolerance
maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)

ray.get([trainer_done_ref, maker_done_ref])

# save model checkpoint after fitting
trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
# save optimizer checkpoint on all ranks
if args.need_optim_ckpt:
trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' %
(torch.cuda.current_device()),
only_rank0=False)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('prompt_path')
Expand Down
63 changes: 36 additions & 27 deletions applications/Chat/coati/ray/example/1m1t_quantize.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import argparse
import pandas as pd
import torch
import ray
import os
import socket

from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
import pandas as pd
import ray
import torch
from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer

from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
from transformers import AutoTokenizer, BloomTokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer


def get_free_port():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('', 0))
Expand All @@ -21,25 +21,29 @@ def get_local_ip():
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(('8.8.8.8', 80))
return s.getsockname()[0]


def main(args):
master_addr = str(get_local_ip())
# trainer_env_info
trainer_port = str(get_free_port())
env_info_trainer = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : trainer_port,
'master_addr' : master_addr}

env_info_trainer = {
'local_rank': '0',
'rank': '0',
'world_size': '1',
'master_port': trainer_port,
'master_addr': master_addr
}

# maker_env_info
maker_port = str(get_free_port())
env_info_maker = {'local_rank' : '0',
'rank' : '0',
'world_size' : '1',
'master_port' : maker_port,
'master_addr' : master_addr}
env_info_maker = {
'local_rank': '0',
'rank': '0',
'world_size': '1',
'master_port': maker_port,
'master_addr': master_addr
}

# configure tokenizer
if args.model == 'gpt2':
Expand All @@ -58,46 +62,46 @@ def main(args):
experience_maker_holder_name_list=["maker1"],
strategy=args.trainer_strategy,
model=args.model,
env_info = env_info_trainer,
env_info=env_info_trainer,
pretrained=args.pretrain,
lora_rank=args.lora_rank,
train_batch_size=args.train_batch_size,
buffer_limit=16,
experience_batch_size=args.experience_batch_size,
max_epochs=args.max_epochs,
#kwargs:
# kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
eval_performance=True,
)

# configure Experience Maker
experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
detached_trainer_name_list=["trainer1"],
strategy=args.maker_strategy,
env_info = env_info_maker,
env_info=env_info_maker,
experience_batch_size=args.experience_batch_size,
kl_coef=0.1,
#kwargs:
# kwargs:
max_length=128,
do_sample=True,
temperature=1.0,
top_k=50,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
debug=args.debug,
eval_performance=True,
)

# a 'jump wire' to set quantized initial_model and reward_model


# trainer send its actor and critic to experience holders.
# ray.get(trainer_ref.initialize_remote_makers.remote())

# configure sampler
dataset = pd.read_csv(args.prompt_path)['prompt']

Expand All @@ -107,19 +111,24 @@ def tokenize_fn(texts):
batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
return {k: v.cuda() for k, v in batch.items()}

trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes,
max_timesteps=args.max_timesteps,
update_timesteps=args.update_timesteps)
num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * \
args.max_epochs + 3 # +3 for fault tolerance
maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)

ray.get([trainer_done_ref, maker_done_ref])

# save model checkpoint after fitting
trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
# save optimizer checkpoint on all ranks
if args.need_optim_ckpt:
trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' %
(torch.cuda.current_device()),
only_rank0=False)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('prompt_path')
Expand Down
Loading