From 994b40ce478fb85c36397886373a38b8149e96f2 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 16 Mar 2023 14:52:20 +0800
Subject: [PATCH 01/36] run the base

---
 applications/ChatGPT/examples/train_prompts.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/applications/ChatGPT/examples/train_prompts.sh b/applications/ChatGPT/examples/train_prompts.sh
index db73ac8e8e85..dcb0aa7f07ec 100755
--- a/applications/ChatGPT/examples/train_prompts.sh
+++ b/applications/ChatGPT/examples/train_prompts.sh
@@ -15,4 +15,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
+# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
+
+torchrun --standalone --nproc_per_node=2 train_prompts.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
+    --strategy colossalai_zero2 --lora_rank 2

From 0390f6e8069ced2d450f18524928d749bbf0e9fa Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 16 Mar 2023 18:31:18 +0800
Subject: [PATCH 02/36] working on dist ppo

---
 .../chatgpt/experience_maker/distributed.py   | 48 ++++++++++
 .../chatgpt/replay_buffer/distributed.py      | 93 +++++++++++++++++++
 applications/ChatGPT/examples/1m1t.py         |  0
 3 files changed, 141 insertions(+)
 create mode 100644 applications/ChatGPT/chatgpt/experience_maker/distributed.py
 create mode 100644 applications/ChatGPT/chatgpt/replay_buffer/distributed.py
 create mode 100644 applications/ChatGPT/examples/1m1t.py

diff --git a/applications/ChatGPT/chatgpt/experience_maker/distributed.py b/applications/ChatGPT/chatgpt/experience_maker/distributed.py
new file mode 100644
index 000000000000..8cee1d792e26
--- /dev/null
+++ b/applications/ChatGPT/chatgpt/experience_maker/distributed.py
@@ -0,0 +1,48 @@
+import torch
+from typing import Any, Callable, Dict, List, Optional, Union
+from .naive import NaiveExperienceMaker, Experience, ExperienceMaker
+from ..replay_buffer.distributed import DistReplayBuffer
+import ray
+from torch import Tensor
+
+class ExperienceMakerSender:
+    '''
+    Args:
+        dist_replay_buffer_name_list: str list to get ray actor handles
+        experience_maker: experience maker
+    '''
+
+    def __init__(self, dist_replay_buffer_name_list: List[str], experience_maker : NaiveExperienceMaker):
+        self.experience_maker = experience_maker
+        self.target_buffer_list = []
+        for name in dist_replay_buffer_name_list:
+            self.target_buffer_list.append(ray.get_actor(name))
+    
+    # copy from ../trainer/base.py
+    def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
+        if isinstance(inputs, Tensor):
+            return self.experience_maker.make_experience(inputs, **self.generate_kwargs)
+        elif isinstance(inputs, dict):
+            return self.experience_maker.make_experience(**inputs, **self.generate_kwargs)
+        else:
+            raise ValueError(f'Unsupported input type "{type(inputs)}"')
+        
+    def update_target_buffer_list(self, new_target_buffer_list):
+        self.target_buffer_list = new_target_buffer_list
+
+    def make_and_send(self, inputs):
+        experience = self._make_experience(inputs)
+        # choose a buffer that has the least experience batch
+        chosen_buffer = None
+        min_length = None
+        while chosen_buffer is None:
+            for target_buffer in self.target_buffer_list:
+                temp_length = ray.get(target_buffer.get_length.remote())
+                if min_length is None:
+                    min_length = temp_length
+                    chosen_buffer = target_buffer
+                else:
+                    if temp_length < min_length:
+                        min_length = temp_length
+                        chosen_buffer = target_buffer
+        target_buffer.append.remote(experience)
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/distributed.py b/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
new file mode 100644
index 000000000000..4337b1e6d623
--- /dev/null
+++ b/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
@@ -0,0 +1,93 @@
+import torch
+import random
+from typing import List
+from .base import ReplayBuffer
+from torch.multiprocessing import Queue
+import ray
+from chatgpt.experience_maker.base import Experience
+from .utils import BufferItem, make_experience_batch, split_experience_batch
+from threading import Lock
+import copy
+
+@ray.remote
+class DistReplayBuffer(ReplayBuffer):
+    '''
+        Distributed replay buffer. Share Experience across workers on the same node. 
+            Therefore a trainer node is expected to have only one instance. 
+            Please set name attribute when initializing:
+
+                DistReplayBuffer.options(..., name="xxx", ...).remote()
+
+            So an ExperienceMakerSender can get the actor handle by name.
+        It is ExperienceMakerSender's obligation to call append(exp) method, remotely.
+    
+    Args:
+        sample_batch_size: Batch size when sampling. Exp won't enqueue until they formed a batch.
+        tp_world_size: Number of workers in the same tp group
+        limit: Limit of number of experience sample BATCHs. A number <= 0 means unlimited. Defaults to 0.
+        cpu_offload: Whether to offload experience to cpu when sampling. Defaults to True.
+    '''
+    
+    
+    def __init__(self, sample_batch_size: int, tp_world_size: int, limit : int = 0, cpu_offload: bool = True) -> None:
+        super().__init__(sample_batch_size, limit)
+        self.cpu_offload = cpu_offload
+        self.sample_batch_size = sample_batch_size
+        self.limit = limit
+        self.items = Queue(self.limit)
+        self.batch_collector : List[BufferItem] = None
+        
+        
+        '''
+        Workers in the same tp group share this buffer. They need same sample for one step.
+            Therefore a held_sample should be returned tp_world_size times before it could be dropped.
+            worker_state records wheter a worker got the held_sample
+        '''
+        self.tp_world_size = tp_world_size
+        self.worker_state = [False] * self.tp_world_size
+        self.held_sample = None
+        self.worker_state_lock = Lock()
+
+    @torch.no_grad()
+    def append(self, experience: Experience) -> None:
+        '''
+        Expected to be called remotely.
+        '''
+        if self.cpu_offload:
+            experience.to_device(torch.device('cpu'))
+        items = split_experience_batch(experience)
+        self.batch_collector.extend(items)
+        while len(self.batch_collector) >= self.sample_batch_size:
+            items = self.batch_collector[:self.sample_batch_size]
+            experience = make_experience_batch(items)
+            self.items.put(experience)
+            self.batch_collector = self.batch_collector[self.sample_batch_size:]
+
+    def clear(self) -> None:
+        self.items.close()
+        self.items = Queue(self.limit)
+        self.worker_state = [False] * self.tp_world_size
+    
+    @torch.no_grad()
+    def sample(self, worker_rank, to_device = "cpu") -> Experience:
+        self.worker_state_lock.acquire()
+        if not any(self.worker_state):
+            self.held_sample = self._sample_and_erase()
+        self.worker_state[worker_rank] = True
+        self.worker_state_lock.release()
+
+        ret = copy.deepcopy(self.held_sample)
+        ret.to_device(to_device)
+        
+        self.worker_state_lock.acquire()
+        if all(self.worker_state):
+            self.worker_state = [False] * self.tp_world_size
+        self.worker_state_lock.release()
+        return ret
+        
+    @torch.no_grad()
+    def _sample_and_erase(self) -> Experience:
+        return self.items.get()
+
+    def get_length(self) -> int:
+        return self.items.qsize()
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
new file mode 100644
index 000000000000..e69de29bb2d1

From c1df61b05d1ab955c54c436c5832c18363b5554c Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 20 Mar 2023 14:03:07 +0800
Subject: [PATCH 03/36] sync

---
 .../ChatGPT/chatgpt/experience_maker/distributed.py    | 10 +++++++++-
 applications/ChatGPT/chatgpt/replay_buffer/__init__.py |  3 ++-
 .../ChatGPT/chatgpt/replay_buffer/distributed.py       | 10 ++++++----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/distributed.py b/applications/ChatGPT/chatgpt/experience_maker/distributed.py
index 8cee1d792e26..84f9deaf40d3 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/distributed.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/distributed.py
@@ -5,7 +5,7 @@
 import ray
 from torch import Tensor
 
-class ExperienceMakerSender:
+class ExperienceMakerHolder:
     '''
     Args:
         dist_replay_buffer_name_list: str list to get ray actor handles
@@ -46,3 +46,11 @@ def make_and_send(self, inputs):
                         min_length = temp_length
                         chosen_buffer = target_buffer
         target_buffer.append.remote(experience)
+
+    def update_experience_maker(self):
+        # TODO: parameter update
+        '''
+        self.experience_maker.actor.update()
+        self.experience_maker.critic.update()
+        '''
+        pass
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/__init__.py b/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
index 1ebf60382913..89f9e422e33a 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
@@ -1,4 +1,5 @@
 from .base import ReplayBuffer
 from .naive import NaiveReplayBuffer
+from .distributed import DistReplayBuffer
 
-__all__ = ['ReplayBuffer', 'NaiveReplayBuffer']
+__all__ = ['ReplayBuffer', 'NaiveReplayBuffer', 'DistReplayBuffer']
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/distributed.py b/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
index 4337b1e6d623..85a034b5020c 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
@@ -2,7 +2,8 @@
 import random
 from typing import List
 from .base import ReplayBuffer
-from torch.multiprocessing import Queue
+# from torch.multiprocessing import Queue
+from ray.util.queue import Queue
 import ray
 from chatgpt.experience_maker.base import Experience
 from .utils import BufferItem, make_experience_batch, split_experience_batch
@@ -27,8 +28,8 @@ class DistReplayBuffer(ReplayBuffer):
         limit: Limit of number of experience sample BATCHs. A number <= 0 means unlimited. Defaults to 0.
         cpu_offload: Whether to offload experience to cpu when sampling. Defaults to True.
     '''
-    
-    
+
+
     def __init__(self, sample_batch_size: int, tp_world_size: int, limit : int = 0, cpu_offload: bool = True) -> None:
         super().__init__(sample_batch_size, limit)
         self.cpu_offload = cpu_offload
@@ -64,7 +65,8 @@ def append(self, experience: Experience) -> None:
             self.batch_collector = self.batch_collector[self.sample_batch_size:]
 
     def clear(self) -> None:
-        self.items.close()
+        # self.items.close()
+        self.items.shutdown()
         self.items = Queue(self.limit)
         self.worker_state = [False] * self.tp_world_size
     

From 518f8378f69b7e7223f9fb6815630ba1f4841450 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 20 Mar 2023 17:14:40 +0800
Subject: [PATCH 04/36] detached trainer

---
 .../{distributed.py => detached.py}           |  34 +++---
 .../ChatGPT/chatgpt/replay_buffer/__init__.py |   4 +-
 .../{distributed.py => detached.py}           |  28 ++---
 .../ChatGPT/chatgpt/trainer/__init__.py       |   3 +-
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 100 ++++++++++++++++
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 108 ++++++++++++++++++
 6 files changed, 240 insertions(+), 37 deletions(-)
 rename applications/ChatGPT/chatgpt/experience_maker/{distributed.py => detached.py} (55%)
 rename applications/ChatGPT/chatgpt/replay_buffer/{distributed.py => detached.py} (78%)
 create mode 100644 applications/ChatGPT/chatgpt/trainer/detached_base.py
 create mode 100644 applications/ChatGPT/chatgpt/trainer/detached_ppo.py

diff --git a/applications/ChatGPT/chatgpt/experience_maker/distributed.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
similarity index 55%
rename from applications/ChatGPT/chatgpt/experience_maker/distributed.py
rename to applications/ChatGPT/chatgpt/experience_maker/detached.py
index 84f9deaf40d3..62bbf664b09c 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/distributed.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -1,22 +1,22 @@
 import torch
 from typing import Any, Callable, Dict, List, Optional, Union
 from .naive import NaiveExperienceMaker, Experience, ExperienceMaker
-from ..replay_buffer.distributed import DistReplayBuffer
+from ..replay_buffer.detached import DetachedReplayBuffer
 import ray
 from torch import Tensor
 
 class ExperienceMakerHolder:
     '''
     Args:
-        dist_replay_buffer_name_list: str list to get ray actor handles
+        detached_trainer_name_list: str list to get ray actor handles
         experience_maker: experience maker
     '''
 
-    def __init__(self, dist_replay_buffer_name_list: List[str], experience_maker : NaiveExperienceMaker):
+    def __init__(self, detached_trainer_name_list: List[str], experience_maker : ExperienceMaker):
         self.experience_maker = experience_maker
-        self.target_buffer_list = []
-        for name in dist_replay_buffer_name_list:
-            self.target_buffer_list.append(ray.get_actor(name))
+        self.target_trainer_list = []
+        for name in detached_trainer_name_list:
+            self.target_trainer_list.append(ray.get_actor(name))
     
     # copy from ../trainer/base.py
     def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
@@ -27,25 +27,27 @@ def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experien
         else:
             raise ValueError(f'Unsupported input type "{type(inputs)}"')
         
-    def update_target_buffer_list(self, new_target_buffer_list):
-        self.target_buffer_list = new_target_buffer_list
+    def update_target_trainer_list(self, detached_trainer_name_list):
+        self.target_trainer_list = []
+        for name in detached_trainer_name_list:
+            self.target_trainer_list.append(ray.get_actor(name))
 
     def make_and_send(self, inputs):
         experience = self._make_experience(inputs)
-        # choose a buffer that has the least experience batch
-        chosen_buffer = None
+        # choose a trainer that has the least experience batch in its detached_replay_buffer
+        chosen_trainer = None
         min_length = None
-        while chosen_buffer is None:
-            for target_buffer in self.target_buffer_list:
-                temp_length = ray.get(target_buffer.get_length.remote())
+        while chosen_trainer is None:
+            for target_trainer in self.target_trainer_list:
+                temp_length = ray.get(target_trainer.get_buffer_length.remote())
                 if min_length is None:
                     min_length = temp_length
-                    chosen_buffer = target_buffer
+                    chosen_trainer = target_trainer
                 else:
                     if temp_length < min_length:
                         min_length = temp_length
-                        chosen_buffer = target_buffer
-        target_buffer.append.remote(experience)
+                        chosen_trainer = target_trainer
+        chosen_trainer.buffer_append.remote(experience)
 
     def update_experience_maker(self):
         # TODO: parameter update
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/__init__.py b/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
index 89f9e422e33a..9815e4fddd11 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/__init__.py
@@ -1,5 +1,5 @@
 from .base import ReplayBuffer
 from .naive import NaiveReplayBuffer
-from .distributed import DistReplayBuffer
+from .detached import DetachedReplayBuffer
 
-__all__ = ['ReplayBuffer', 'NaiveReplayBuffer', 'DistReplayBuffer']
+__all__ = ['ReplayBuffer', 'NaiveReplayBuffer', 'DetachedReplayBuffer']
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/distributed.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
similarity index 78%
rename from applications/ChatGPT/chatgpt/replay_buffer/distributed.py
rename to applications/ChatGPT/chatgpt/replay_buffer/detached.py
index 85a034b5020c..0ebbd3e13c01 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/distributed.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -1,6 +1,6 @@
 import torch
 import random
-from typing import List
+from typing import List, Any
 from .base import ReplayBuffer
 # from torch.multiprocessing import Queue
 from ray.util.queue import Queue
@@ -10,17 +10,11 @@
 from threading import Lock
 import copy
 
-@ray.remote
-class DistReplayBuffer(ReplayBuffer):
+class DetachedReplayBuffer(ReplayBuffer):
     '''
-        Distributed replay buffer. Share Experience across workers on the same node. 
-            Therefore a trainer node is expected to have only one instance. 
-            Please set name attribute when initializing:
-
-                DistReplayBuffer.options(..., name="xxx", ...).remote()
-
-            So an ExperienceMakerSender can get the actor handle by name.
-        It is ExperienceMakerSender's obligation to call append(exp) method, remotely.
+        Detached replay buffer. Share Experience across workers on the same node. 
+        Therefore a trainer node is expected to have only one instance. 
+        It is ExperienceMakerHolder's duty to call append(exp) method, remotely.
     
     Args:
         sample_batch_size: Batch size when sampling. Exp won't enqueue until they formed a batch.
@@ -29,18 +23,16 @@ class DistReplayBuffer(ReplayBuffer):
         cpu_offload: Whether to offload experience to cpu when sampling. Defaults to True.
     '''
 
-
-    def __init__(self, sample_batch_size: int, tp_world_size: int, limit : int = 0, cpu_offload: bool = True) -> None:
+    def __init__(self, sample_batch_size: int, tp_world_size: int = 1, limit : int = 0, cpu_offload: bool = True) -> None:
         super().__init__(sample_batch_size, limit)
         self.cpu_offload = cpu_offload
         self.sample_batch_size = sample_batch_size
         self.limit = limit
         self.items = Queue(self.limit)
         self.batch_collector : List[BufferItem] = None
-        
-        
+
         '''
-        Workers in the same tp group share this buffer. They need same sample for one step.
+        Workers in the same tp group share this buffer and need same sample for one step.
             Therefore a held_sample should be returned tp_world_size times before it could be dropped.
             worker_state records wheter a worker got the held_sample
         '''
@@ -71,7 +63,7 @@ def clear(self) -> None:
         self.worker_state = [False] * self.tp_world_size
     
     @torch.no_grad()
-    def sample(self, worker_rank, to_device = "cpu") -> Experience:
+    def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
         self.worker_state_lock.acquire()
         if not any(self.worker_state):
             self.held_sample = self._sample_and_erase()
@@ -89,7 +81,7 @@ def sample(self, worker_rank, to_device = "cpu") -> Experience:
         
     @torch.no_grad()
     def _sample_and_erase(self) -> Experience:
-        return self.items.get()
+        return self.items.get(block=True)
 
     def get_length(self) -> int:
         return self.items.qsize()
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/__init__.py b/applications/ChatGPT/chatgpt/trainer/__init__.py
index c47c76347ee5..65601778c46e 100644
--- a/applications/ChatGPT/chatgpt/trainer/__init__.py
+++ b/applications/ChatGPT/chatgpt/trainer/__init__.py
@@ -2,4 +2,5 @@
 from .ppo import PPOTrainer
 from .rm import RewardModelTrainer
 
-__all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer']
+__all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer',
+           'DetachedTrainer', 'DetachedPPOTrainer',]
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
new file mode 100644
index 000000000000..e81cded01fc1
--- /dev/null
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -0,0 +1,100 @@
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import torch
+from chatgpt.experience_maker import Experience
+from chatgpt.replay_buffer import DetachedReplayBuffer
+from torch import Tensor
+from tqdm import tqdm
+
+from .callbacks import Callback
+from .strategies import Strategy
+from .utils import is_rank_0
+
+import ray
+
+
+# @ray.remote
+class DetachedTrainer(ABC):
+    '''
+        Base class for detached rlhf trainers. 
+        'detach' means that the experience maker is detached compared to a normal Trainer.
+        Please set name attribute during init:
+            >>> trainer = DetachedTrainer.options(..., name = "xxx", ...).remote()
+            So an ExperienceMakerHolder can reach the detached_replay_buffer by Actor's name.
+    Args:
+        detached_strategy (DetachedStrategy): the strategy to use for training
+        detached_replay_buffer_ref (ObjectRef[DetachedReplayBuffer]): the replay buffer to use for training
+        experience_batch_size (int, defaults to 8): the batch size to use for experience generation
+        max_epochs (int, defaults to 1): the number of epochs of training process
+        tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
+        data_loader_pin_memory (bool, defaults to True): whether to pin memory for data loader
+        callbacks (List[Callback], defaults to []): the callbacks to call during training process
+        generate_kwargs (dict, optional): the kwargs to use while model generating
+    '''
+
+    def __init__(self,
+                 strategy: Strategy,# TODO: DetachedStrategy
+                 detached_replay_buffer: DetachedReplayBuffer,
+                 experience_batch_size: int = 8,
+                 max_epochs: int = 1,
+                 tokenizer: Optional[Callable[[Any], dict]] = None,
+                 dataloader_pin_memory: bool = True,
+                 callbacks: List[Callback] = [],
+                 **generate_kwargs
+                 )->None:
+        super().__init__()
+        self.strategy = strategy
+        self.detached_replay_buffer = detached_replay_buffer
+        self.experience_batch_size = experience_batch_size
+        self.max_epochs = max_epochs
+        self.tokenizer = tokenizer
+        self.generate_kwargs = generate_kwargs
+        self.dataloader_pin_memory = dataloader_pin_memory
+        self.callbacks = callbacks
+
+    @abstractmethod
+    def training_step(self, experience: Experience) -> Dict[str, Any]:
+        pass
+
+    def _learn(self):
+        pbar = tqdm(range(self.max_epochs), desc='Train epoch', disable=not is_rank_0())
+        for _ in pbar:
+            experience = self.detached_replay_buffer.sample()
+            metrics = self.training_step(experience)
+            pbar.set_postfix(metrics)
+
+    def fit(self, pronpts, num_episodes: int = 50000, max_timesteps: int = 500 * 5000) -> None:
+        self._on_fit_start()
+        for episode in range(num_episodes):
+            self._on_episode_start(episode)
+            for timestep in tqdm(range(max_timesteps),
+                                 desc=f'Episode [{episode+1}/{num_episodes}]',
+                                 disable=not is_rank_0()):
+                self._learn()
+            self._on_episode_end(episode)
+        self._on_fit_end()
+
+    def get_buffer_length(self):
+        # called by ExperienceMakerHolder
+        return self.detached_replay_buffer.get_length()
+
+    def buffer_append(self, experience: Experience):
+        # called by ExperienceMakerHolder
+        self.detached_replay_buffer.append(experience)
+
+    def _on_fit_start(self) -> None:
+        for callback in self.callbacks:
+            callback.on_fit_start()
+
+    def _on_fit_end(self) -> None:
+        for callback in self.callbacks:
+            callback.on_fit_end()
+
+    def _on_episode_start(self, episode: int) -> None:
+        for callback in self.callbacks:
+            callback.on_episode_start(episode)
+
+    def _on_episode_end(self, episode: int) -> None:
+        for callback in self.callbacks:
+            callback.on_episode_end(episode)
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
new file mode 100644
index 000000000000..3155f218ccbf
--- /dev/null
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -0,0 +1,108 @@
+from typing import Any, Callable, Dict, List, Optional
+
+import torch.nn as nn
+from chatgpt.experience_maker import Experience, NaiveExperienceMaker
+from chatgpt.models.base import Actor, Critic
+from chatgpt.models.generation_utils import update_model_kwargs_fn
+from chatgpt.models.loss import PolicyLoss, ValueLoss
+from chatgpt.replay_buffer import DetachedReplayBuffer
+from torch.optim import Optimizer
+
+from .detached_base import DetachedTrainer
+from .callbacks import Callback
+from .strategies import Strategy
+
+import ray
+
+@ray.remote
+class DetachedPPOTrainer(DetachedTrainer):
+    '''
+        Detached Trainer for PPO algorithm
+    Args:
+        strategy (Strategy): the strategy to use for training
+        actor (Actor): the actor model in ppo algorithm
+        critic (Critic): the critic model in ppo algorithm
+        actor_optim (Optimizer): the optimizer to use for actor model
+        critic_optim (Optimizer): the optimizer to use for critic model
+        train_batch_size (int, defaults to 8): the batch size to use for training
+        train_batch_size (int, defaults to 8): the batch size to use for training
+        buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
+        buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
+        eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
+        value_clip (float, defaults to 0.4): the clip coefficient of value loss
+        experience_batch_size (int, defaults to 8): the batch size to use for experience generation
+        max_epochs (int, defaults to 1): the number of epochs of training process
+        dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
+        callbacks (List[Callback], defaults to []): the callbacks to call during training process
+        generate_kwargs (dict, optional): the kwargs to use while model generating
+    '''
+    
+    def __int__(self,
+                strategy: Strategy,
+                actor: Actor,
+                critic: Critic,
+                actor_optim: Optimizer,
+                critic_optim: Optimizer,
+                train_batch_size: int = 8,
+                buffer_limit: int = 0,
+                buffer_cpu_offload: bool = True,
+                eps_clip: float = 0.2,
+                value_clip: float = 0.4,
+                experience_batch_size: int = 8,
+                max_epochs: int = 1,
+                tokenizer: Optional[Callable[[Any], dict]] = None,
+                dataloader_pin_memory: bool = True,
+                callbacks: List[Callback] = [],
+                **generate_kwargs) -> None:
+        detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
+        generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
+        super().__init__(strategy, detached_replay_buffer, experience_batch_size, max_epochs, tokenizer,
+                         dataloader_pin_memory, callbacks, **generate_kwargs)
+        self.actor = actor
+        self.critic = critic
+        
+        self.actor_loss_fn = PolicyLoss(eps_clip)
+        self.critic_loss_fn = ValueLoss(value_clip)
+        
+        self.actor_optim = actor_optim
+        self.critic_optim = critic_optim
+        
+    def training_step(self, experience: Experience) -> Dict[str, float]:
+        self.actor.train()
+        self.critic.train()
+        
+        num_actions = experience.action_mask.size(1)
+        action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
+        actor_loss = self.actor_loss_fn(action_log_probs,
+                                        experience.action_log_probs,
+                                        experience.advantages,
+                                        action_mask=experience.action_mask)
+        self.strategy.backward(actor_loss, self.actor, self.actor_optim)
+        self.strategy.optimizer_step(self.actor_optim)
+        self.actor_optim.zero_grad()
+        
+        values = self.critic(experience.sequences,
+                             action_mask=experience.action_mask,
+                             attention_mask=experience.attention_mask)
+        critic_loss = self.critic_loss_fn(values,
+                                          experience.values,
+                                          experience.reward,
+                                          action_mask=experience.action_mask)
+        
+        self.strategy.backward(critic_loss, self.critic, self.critic_optim)
+        self.strategy.optimizer_step(self.critic_optim)
+        self.critic_optim.zero_grad()
+        
+        return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
+
+def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
+    origin_model = strategy._unwrap_actor(actor)
+    new_kwargs = {**generate_kwargs}
+    # use huggingface models method directly
+    if 'prepare_inputs_fn' not in generate_kwargs and hasattr(origin_model, 'prepare_inputs_for_generation'):
+        new_kwargs['prepare_inputs_fn'] = origin_model.prepare_inputs_for_generation
+
+    if 'update_model_kwargs_fn' not in generate_kwargs:
+        new_kwargs['update_model_kwargs_fn'] = update_model_kwargs_fn
+
+    return new_kwargs

From b707ba28fc3776d2e4d469f1c877b0bf4ea43dcd Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 20 Mar 2023 17:48:45 +0800
Subject: [PATCH 05/36] update detached trainer. no maker update function

---
 .../chatgpt/experience_maker/detached.py      | 15 +++++++++----
 .../chatgpt/experience_maker/strategy/base.py |  0
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 22 +++++++++++++++++--
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 18 +++++++++++++--
 4 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 applications/ChatGPT/chatgpt/experience_maker/strategy/base.py

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 62bbf664b09c..0f6f2e536940 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -5,6 +5,7 @@
 import ray
 from torch import Tensor
 
+@ray.remote
 class ExperienceMakerHolder:
     '''
     Args:
@@ -49,10 +50,16 @@ def make_and_send(self, inputs):
                         chosen_trainer = target_trainer
         chosen_trainer.buffer_append.remote(experience)
 
-    def update_experience_maker(self):
+    def update_experience_maker(self, new_actor, new_critic):
         # TODO: parameter update
         '''
-        self.experience_maker.actor.update()
-        self.experience_maker.critic.update()
+        pseudo:
+            self.experience_maker.actor.update()
+            self.experience_maker.critic.update()
         '''
-        pass
\ No newline at end of file
+        # TODO: reduce malloc
+        with torch.no_grad():
+            self.experience_maker.actor = new_actor
+            self.experience_maker.critic = new_critic
+        pass
+        
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/experience_maker/strategy/base.py b/applications/ChatGPT/chatgpt/experience_maker/strategy/base.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index e81cded01fc1..a6666c347e88 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -34,6 +34,7 @@ class DetachedTrainer(ABC):
     '''
 
     def __init__(self,
+                 experience_maker_holder_name_list: List[str],
                  strategy: Strategy,# TODO: DetachedStrategy
                  detached_replay_buffer: DetachedReplayBuffer,
                  experience_batch_size: int = 8,
@@ -53,6 +54,19 @@ def __init__(self,
         self.dataloader_pin_memory = dataloader_pin_memory
         self.callbacks = callbacks
 
+        self.target_holder_name_list = experience_maker_holder_name_list
+        self.target_holder_list = []
+
+    def update_target_holder_list(self, experience_maker_holder_name_list):
+        self.target_holder_name_list = experience_maker_holder_name_list
+        self.target_holder_list = []
+        for name in self.target_holder_name_list:
+            self.target_holder_list.append(ray.get_actor(name))
+
+    @abstractmethod
+    def update_remote_makers(self):
+        pass
+
     @abstractmethod
     def training_step(self, experience: Experience) -> Dict[str, Any]:
         pass
@@ -64,14 +78,18 @@ def _learn(self):
             metrics = self.training_step(experience)
             pbar.set_postfix(metrics)
 
-    def fit(self, pronpts, num_episodes: int = 50000, max_timesteps: int = 500 * 5000) -> None:
+    def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timesteps: int = 5000) -> None:
         self._on_fit_start()
         for episode in range(num_episodes):
             self._on_episode_start(episode)
             for timestep in tqdm(range(max_timesteps),
                                  desc=f'Episode [{episode+1}/{num_episodes}]',
                                  disable=not is_rank_0()):
-                self._learn()
+                for _ in update_timesteps:
+                    self._learn()
+                # assume those remote holders are working
+                # self.update_remote_makers()
+
             self._on_episode_end(episode)
         self._on_fit_end()
 
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 3155f218ccbf..252a05063972 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -1,5 +1,6 @@
 from typing import Any, Callable, Dict, List, Optional
 
+import torch
 import torch.nn as nn
 from chatgpt.experience_maker import Experience, NaiveExperienceMaker
 from chatgpt.models.base import Actor, Critic
@@ -12,6 +13,8 @@
 from .callbacks import Callback
 from .strategies import Strategy
 
+from .utils import is_rank_0
+
 import ray
 
 @ray.remote
@@ -37,7 +40,8 @@ class DetachedPPOTrainer(DetachedTrainer):
         generate_kwargs (dict, optional): the kwargs to use while model generating
     '''
     
-    def __int__(self,
+    def __int__(self, 
+                experience_maker_holder_name_list: List[str],
                 strategy: Strategy,
                 actor: Actor,
                 critic: Critic,
@@ -56,7 +60,7 @@ def __int__(self,
                 **generate_kwargs) -> None:
         detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
-        super().__init__(strategy, detached_replay_buffer, experience_batch_size, max_epochs, tokenizer,
+        super().__init__(experience_maker_holder_name_list, strategy, detached_replay_buffer, experience_batch_size, max_epochs, tokenizer,
                          dataloader_pin_memory, callbacks, **generate_kwargs)
         self.actor = actor
         self.critic = critic
@@ -67,6 +71,16 @@ def __int__(self,
         self.actor_optim = actor_optim
         self.critic_optim = critic_optim
         
+    def update_remote_makers(self):
+        # TODO: balance duties
+        if is_rank_0():
+            self.update_target_holder_list(self.target_holder_name_list)
+        for target_holder in self.target_holder_list:
+            # TODO: reduce malloc
+            with torch.no_grad():
+                target_holder.update_experience_maker.remote(self.actor, self.critic)
+
+
     def training_step(self, experience: Experience) -> Dict[str, float]:
         self.actor.train()
         self.critic.train()

From 1311924c880c48093188b7003da8570191cf9a06 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 21 Mar 2023 14:27:17 +0800
Subject: [PATCH 06/36] facing init problem

---
 .../chatgpt/experience_maker/__init__.py      |   3 +-
 .../chatgpt/experience_maker/detached.py      |  27 +++-
 .../ChatGPT/chatgpt/trainer/__init__.py       |   2 +
 .../ChatGPT/chatgpt/trainer/detached_base.py  |  17 +-
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  14 +-
 applications/ChatGPT/examples/1m1t.py         | 147 ++++++++++++++++++
 applications/ChatGPT/examples/1m1t.sh         |  19 +++
 7 files changed, 215 insertions(+), 14 deletions(-)
 create mode 100644 applications/ChatGPT/examples/1m1t.sh

diff --git a/applications/ChatGPT/chatgpt/experience_maker/__init__.py b/applications/ChatGPT/chatgpt/experience_maker/__init__.py
index 39ca7576b227..e3f43a20f72a 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/__init__.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/__init__.py
@@ -1,4 +1,5 @@
 from .base import Experience, ExperienceMaker
 from .naive import NaiveExperienceMaker
+from .detached import ExperienceMakerHolder
 
-__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
+__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker', 'ExperienceMakerHolder']
diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 0f6f2e536940..38f697b55a30 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -4,17 +4,27 @@
 from ..replay_buffer.detached import DetachedReplayBuffer
 import ray
 from torch import Tensor
+import torch.nn as nn
+from chatgpt.models.base import Actor
+from chatgpt.trainer.strategies.sampler import DistributedSampler
 
 @ray.remote
 class ExperienceMakerHolder:
     '''
     Args:
         detached_trainer_name_list: str list to get ray actor handles
-        experience_maker: experience maker
+        [others]: ExperienceMaker init
     '''
 
-    def __init__(self, detached_trainer_name_list: List[str], experience_maker : ExperienceMaker):
-        self.experience_maker = experience_maker
+    def __init__(self, 
+                 detached_trainer_name_list: List[str], 
+                 actor: Actor,
+                 critic: nn.Module,
+                 reward_model: nn.Module,
+                 initial_model: Actor,
+                 kl_coef: float = 0.1,):
+        
+        self.experience_maker = ExperienceMaker(actor, critic, reward_model, initial_model,kl_coef)
         self.target_trainer_list = []
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name))
@@ -40,7 +50,7 @@ def make_and_send(self, inputs):
         min_length = None
         while chosen_trainer is None:
             for target_trainer in self.target_trainer_list:
-                temp_length = ray.get(target_trainer.get_buffer_length.remote())
+                temp_length = ray.get(target_trainer.buffer_get_length.remote())
                 if min_length is None:
                     min_length = temp_length
                     chosen_trainer = target_trainer
@@ -50,6 +60,15 @@ def make_and_send(self, inputs):
                         chosen_trainer = target_trainer
         chosen_trainer.buffer_append.remote(experience)
 
+    def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
+        for _ in range(times):
+            rand_prompts = sampler
+            if tokenizer is not None:
+                    inputs = tokenizer(rand_prompts)
+            else:
+                inputs = rand_prompts
+            self.make_and_send(inputs)
+        
     def update_experience_maker(self, new_actor, new_critic):
         # TODO: parameter update
         '''
diff --git a/applications/ChatGPT/chatgpt/trainer/__init__.py b/applications/ChatGPT/chatgpt/trainer/__init__.py
index 65601778c46e..c7e309b94fc1 100644
--- a/applications/ChatGPT/chatgpt/trainer/__init__.py
+++ b/applications/ChatGPT/chatgpt/trainer/__init__.py
@@ -1,6 +1,8 @@
 from .base import Trainer
 from .ppo import PPOTrainer
 from .rm import RewardModelTrainer
+from .detached_ppo import DetachedPPOTrainer
+from .detached_base import DetachedTrainer
 
 __all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer',
            'DetachedTrainer', 'DetachedPPOTrainer',]
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index a6666c347e88..fed687b057c4 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -2,9 +2,11 @@
 from typing import Any, Callable, Dict, List, Optional, Union
 
 import torch
+from torch import Tensor
+import torch.nn as nn
+from torch.optim import Optimizer
 from chatgpt.experience_maker import Experience
 from chatgpt.replay_buffer import DetachedReplayBuffer
-from torch import Tensor
 from tqdm import tqdm
 
 from .callbacks import Callback
@@ -27,7 +29,6 @@ class DetachedTrainer(ABC):
         detached_replay_buffer_ref (ObjectRef[DetachedReplayBuffer]): the replay buffer to use for training
         experience_batch_size (int, defaults to 8): the batch size to use for experience generation
         max_epochs (int, defaults to 1): the number of epochs of training process
-        tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
         data_loader_pin_memory (bool, defaults to True): whether to pin memory for data loader
         callbacks (List[Callback], defaults to []): the callbacks to call during training process
         generate_kwargs (dict, optional): the kwargs to use while model generating
@@ -36,10 +37,9 @@ class DetachedTrainer(ABC):
     def __init__(self,
                  experience_maker_holder_name_list: List[str],
                  strategy: Strategy,# TODO: DetachedStrategy
-                 detached_replay_buffer: DetachedReplayBuffer,
+                 detached_replay_buffer: DetachedReplayBuffer = None,
                  experience_batch_size: int = 8,
                  max_epochs: int = 1,
-                 tokenizer: Optional[Callable[[Any], dict]] = None,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs
@@ -49,7 +49,6 @@ def __init__(self,
         self.detached_replay_buffer = detached_replay_buffer
         self.experience_batch_size = experience_batch_size
         self.max_epochs = max_epochs
-        self.tokenizer = tokenizer
         self.generate_kwargs = generate_kwargs
         self.dataloader_pin_memory = dataloader_pin_memory
         self.callbacks = callbacks
@@ -93,7 +92,7 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
             self._on_episode_end(episode)
         self._on_fit_end()
 
-    def get_buffer_length(self):
+    def buffer_get_length(self):
         # called by ExperienceMakerHolder
         return self.detached_replay_buffer.get_length()
 
@@ -101,6 +100,12 @@ def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
         self.detached_replay_buffer.append(experience)
 
+    def strategy_save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_model(model, path, only_rank0)
+
+    def strategy_save_potimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_optimizer(optimizer, path, only_rank0)
+
     def _on_fit_start(self) -> None:
         for callback in self.callbacks:
             callback.on_fit_start()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 252a05063972..b4d854339f69 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -54,14 +54,19 @@ def __int__(self,
                 value_clip: float = 0.4,
                 experience_batch_size: int = 8,
                 max_epochs: int = 1,
-                tokenizer: Optional[Callable[[Any], dict]] = None,
                 dataloader_pin_memory: bool = True,
                 callbacks: List[Callback] = [],
                 **generate_kwargs) -> None:
         detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
-        super().__init__(experience_maker_holder_name_list, strategy, detached_replay_buffer, experience_batch_size, max_epochs, tokenizer,
-                         dataloader_pin_memory, callbacks, **generate_kwargs)
+        super().__init__(experience_maker_holder_name_list = experience_maker_holder_name_list, 
+                         strategy = strategy, 
+                         detached_replay_buffer = detached_replay_buffer, 
+                         experience_batch_size = experience_batch_size,
+                         max_epochs = max_epochs,
+                         dataloader_pin_memory = dataloader_pin_memory,
+                         callbacks = callbacks,
+                         **generate_kwargs)
         self.actor = actor
         self.critic = critic
         
@@ -108,6 +113,9 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.critic_optim.zero_grad()
         
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
+    
+    def get_models(self):
+        return self.actor, self.critic, self.actor_optim, self.critic_optim
 
 def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
     origin_model = strategy._unwrap_actor(actor)
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index e69de29bb2d1..51c256d13480 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -0,0 +1,147 @@
+import argparse
+from copy import deepcopy
+
+import pandas as pd
+import torch
+from chatgpt.models.base import RewardModel
+from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
+from chatgpt.models.gpt import GPTActor, GPTCritic
+from chatgpt.models.opt import OPTActor, OPTCritic
+from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from torch.optim import Adam
+from transformers import AutoTokenizer, BloomTokenizerFast
+from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
+
+from colossalai.nn.optimizer import HybridAdam
+
+import ray
+
+# TODO: update maker actor/critic
+
+def main(args):
+    # configure strategy
+    if args.strategy == 'naive':
+        strategy = NaiveStrategy()
+    elif args.strategy == 'ddp':
+        strategy = DDPStrategy()
+    elif args.strategy == 'colossalai_gemini':
+        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif args.strategy == 'colossalai_zero2':
+        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{args.strategy}"')
+
+    # configure model / optimizer
+    with strategy.model_init_context():
+        if args.model == 'gpt2':
+            actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+            critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+        elif args.model == 'bloom':
+            actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+            critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+        elif args.model == 'opt':
+            actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+            critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
+        else:
+            raise ValueError(f'Unsupported model "{args.model}"')
+
+    initial_model = deepcopy(actor)
+    reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
+
+    if args.strategy.startswith('colossalai'):
+        actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
+        critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
+    else:
+        actor_optim = Adam(actor.parameters(), lr=5e-6)
+        critic_optim = Adam(critic.parameters(), lr=5e-6)
+
+    (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
+        (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
+
+    actor_maker = deepcopy(actor)
+    critic_maker = deepcopy(critic)
+
+    # configure tokenizer
+    if args.model == 'gpt2':
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'bloom':
+        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'opt':
+        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+    else:
+        raise ValueError(f'Unsupported model "{args.model}"')
+
+    def tokenize_fn(texts):
+        # MUST padding to max length to ensure inputs of all ranks have the same length
+        # Different length may lead to hang when using gemini, as different generation steps
+        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
+        return {k: v.cuda() for k, v in batch.items()}
+    
+    # configure sampler
+    dataset = pd.read_csv(args.prompt_path)['prompt']
+    sampler = strategy.setup_sampler(dataset)
+
+    # configure Ray Actor
+    # 应当在trainer那边初始化模型的, 直接传, 传不过去
+    # maker和trainer这两头, 尽量完全隔离
+    trainer = DetachedPPOTrainer.options(name="trainer1", num_gpus=1).remote(
+        experience_maker_holder_name_list= ["maker1"],
+        strategy = strategy,
+        actor = actor,
+        critic = critic,
+        actor_optim = actor_optim,
+        critic_optim = critic_optim,
+        experience_batch_size = args.experience_batch_size,
+        max_epoch = args.max_epochs,
+        train_batch_size = args.train_batch_size,
+        )
+    
+
+    experience_holder = ExperienceMakerHolder.options(name="maker1", num_gpus=1).remote(
+        ["trainer1"],
+        actor_maker,
+        critic_maker,
+        reward_model,
+        initial_model)
+    
+    
+    
+    trainer.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps,update_timesteps=args.update_timesteps)
+
+    experience_holder.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps+3)
+
+    trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim = trainer.get_models.remote()
+    
+    # save model checkpoint after fitting
+    trainer.strategy_save_model.remote(trainer_actor, args.save_path, only_rank0=True)
+    # save optimizer checkpoint on all ranks
+    if args.need_optim_ckpt:
+        trainer.strategy_save_optimizer.remote(trainer_actor_optim, 
+                                               'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                               only_rank0=False)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('prompt_path')
+    parser.add_argument('--strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
+    parser.add_argument('--pretrain', type=str, default=None)
+    parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
+    parser.add_argument('--need_optim_ckpt', type=bool, default=False)
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_timesteps', type=int, default=10)
+    parser.add_argument('--update_timesteps', type=int, default=10)
+    parser.add_argument('--max_epochs', type=int, default=5)
+    parser.add_argument('--train_batch_size', type=int, default=8)
+    parser.add_argument('--experience_batch_size', type=int, default=8)
+    parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
+    args = parser.parse_args()
+    ray.init()
+    main(args)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
new file mode 100644
index 000000000000..ea9b2102956c
--- /dev/null
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -0,0 +1,19 @@
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
+
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+torchrun --standalone 1m1t.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
+    --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file

From 29976fa19796903c99772b75b55621a932b24796 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 21 Mar 2023 18:13:21 +0800
Subject: [PATCH 07/36] 1 maker 1 trainer detached run. but no model update

---
 .../chatgpt/experience_maker/detached.py      | 21 ++++++--
 .../ChatGPT/chatgpt/replay_buffer/detached.py |  6 +--
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 24 ++++++---
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 39 +++++++++------
 applications/ChatGPT/examples/1m1t.py         | 49 +++++++++++++------
 applications/ChatGPT/examples/1m1t.sh         |  2 +-
 6 files changed, 97 insertions(+), 44 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 38f697b55a30..50d56348dda2 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -13,7 +13,12 @@ class ExperienceMakerHolder:
     '''
     Args:
         detached_trainer_name_list: str list to get ray actor handles
-        [others]: ExperienceMaker init
+        actor: \
+        critic: \
+        reward_model: \
+        initial_model: \
+        kl_coef:                 NaiveExperienceMaker init
+        experience_batch_size: batch size of generated experience
     '''
 
     def __init__(self, 
@@ -22,10 +27,14 @@ def __init__(self,
                  critic: nn.Module,
                  reward_model: nn.Module,
                  initial_model: Actor,
-                 kl_coef: float = 0.1,):
+                 kl_coef: float = 0.1,
+                 experience_batch_size:int = 8,
+                 **generate_kwargs):
         
-        self.experience_maker = ExperienceMaker(actor, critic, reward_model, initial_model,kl_coef)
+        self.experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model,kl_coef)
         self.target_trainer_list = []
+        self.experience_batch_size = experience_batch_size
+        self.generate_kwargs = generate_kwargs
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name))
     
@@ -48,6 +57,8 @@ def make_and_send(self, inputs):
         # choose a trainer that has the least experience batch in its detached_replay_buffer
         chosen_trainer = None
         min_length = None
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print("[maker] choosing tartget")
         while chosen_trainer is None:
             for target_trainer in self.target_trainer_list:
                 temp_length = ray.get(target_trainer.buffer_get_length.remote())
@@ -58,11 +69,13 @@ def make_and_send(self, inputs):
                     if temp_length < min_length:
                         min_length = temp_length
                         chosen_trainer = target_trainer
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print("[maker] sending")
         chosen_trainer.buffer_append.remote(experience)
 
     def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
         for _ in range(times):
-            rand_prompts = sampler
+            rand_prompts = sampler.sample(self.experience_batch_size)
             if tokenizer is not None:
                     inputs = tokenizer(rand_prompts)
             else:
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index 0ebbd3e13c01..ef5b0f2235f5 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -10,7 +10,7 @@
 from threading import Lock
 import copy
 
-class DetachedReplayBuffer(ReplayBuffer):
+class DetachedReplayBuffer:
     '''
         Detached replay buffer. Share Experience across workers on the same node. 
         Therefore a trainer node is expected to have only one instance. 
@@ -24,12 +24,11 @@ class DetachedReplayBuffer(ReplayBuffer):
     '''
 
     def __init__(self, sample_batch_size: int, tp_world_size: int = 1, limit : int = 0, cpu_offload: bool = True) -> None:
-        super().__init__(sample_batch_size, limit)
         self.cpu_offload = cpu_offload
         self.sample_batch_size = sample_batch_size
         self.limit = limit
         self.items = Queue(self.limit)
-        self.batch_collector : List[BufferItem] = None
+        self.batch_collector : List[BufferItem] = []
 
         '''
         Workers in the same tp group share this buffer and need same sample for one step.
@@ -61,6 +60,7 @@ def clear(self) -> None:
         self.items.shutdown()
         self.items = Queue(self.limit)
         self.worker_state = [False] * self.tp_world_size
+        self.batch_collector = []
     
     @torch.no_grad()
     def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index fed687b057c4..9c584ab2baa4 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -36,23 +36,23 @@ class DetachedTrainer(ABC):
 
     def __init__(self,
                  experience_maker_holder_name_list: List[str],
-                 strategy: Strategy,# TODO: DetachedStrategy
-                 detached_replay_buffer: DetachedReplayBuffer = None,
+                 strategy: Strategy,  # TODO: DetachedStrategy
+                 train_batch_size: int = 8,
+                 buffer_limit: int = 0,
+                 buffer_cpu_offload: bool = True,
                  experience_batch_size: int = 8,
                  max_epochs: int = 1,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
-                 **generate_kwargs
-                 )->None:
+                 **generate_kwargs)->None:
         super().__init__()
         self.strategy = strategy
-        self.detached_replay_buffer = detached_replay_buffer
+        self.detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
         self.experience_batch_size = experience_batch_size
         self.max_epochs = max_epochs
-        self.generate_kwargs = generate_kwargs
         self.dataloader_pin_memory = dataloader_pin_memory
         self.callbacks = callbacks
-
+        self.generate_kwargs = generate_kwargs
         self.target_holder_name_list = experience_maker_holder_name_list
         self.target_holder_list = []
 
@@ -73,7 +73,11 @@ def training_step(self, experience: Experience) -> Dict[str, Any]:
     def _learn(self):
         pbar = tqdm(range(self.max_epochs), desc='Train epoch', disable=not is_rank_0())
         for _ in pbar:
+            if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+                print("[trainer] sampling exp")
             experience = self.detached_replay_buffer.sample()
+            if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+                print("[trainer] training step")
             metrics = self.training_step(experience)
             pbar.set_postfix(metrics)
 
@@ -84,7 +88,7 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
             for timestep in tqdm(range(max_timesteps),
                                  desc=f'Episode [{episode+1}/{num_episodes}]',
                                  disable=not is_rank_0()):
-                for _ in update_timesteps:
+                for _ in range(update_timesteps):
                     self._learn()
                 # assume those remote holders are working
                 # self.update_remote_makers()
@@ -94,10 +98,14 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
 
     def buffer_get_length(self):
         # called by ExperienceMakerHolder
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print("[trainer] telling length")
         return self.detached_replay_buffer.get_length()
 
     def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print("[trainer] receiving exp")
         self.detached_replay_buffer.append(experience)
 
     def strategy_save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index b4d854339f69..1c51fc65d127 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -1,4 +1,5 @@
 from typing import Any, Callable, Dict, List, Optional
+import time
 
 import torch
 import torch.nn as nn
@@ -17,6 +18,7 @@
 
 import ray
 
+
 @ray.remote
 class DetachedPPOTrainer(DetachedTrainer):
     '''
@@ -40,7 +42,7 @@ class DetachedPPOTrainer(DetachedTrainer):
         generate_kwargs (dict, optional): the kwargs to use while model generating
     '''
     
-    def __int__(self, 
+    def __init__(self, 
                 experience_maker_holder_name_list: List[str],
                 strategy: Strategy,
                 actor: Actor,
@@ -57,25 +59,27 @@ def __int__(self,
                 dataloader_pin_memory: bool = True,
                 callbacks: List[Callback] = [],
                 **generate_kwargs) -> None:
-        detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
+        self.fully_initialized = False
         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
-        super().__init__(experience_maker_holder_name_list = experience_maker_holder_name_list, 
-                         strategy = strategy, 
-                         detached_replay_buffer = detached_replay_buffer, 
-                         experience_batch_size = experience_batch_size,
-                         max_epochs = max_epochs,
-                         dataloader_pin_memory = dataloader_pin_memory,
-                         callbacks = callbacks,
-                         **generate_kwargs)
         self.actor = actor
         self.critic = critic
-        
         self.actor_loss_fn = PolicyLoss(eps_clip)
         self.critic_loss_fn = ValueLoss(value_clip)
-        
         self.actor_optim = actor_optim
         self.critic_optim = critic_optim
-        
+        super().__init__(experience_maker_holder_name_list, 
+                         strategy = strategy,
+                         train_batch_size=train_batch_size,
+                         buffer_limit=buffer_limit,
+                         buffer_cpu_offload=buffer_cpu_offload,
+                         experience_batch_size = experience_batch_size,
+                         max_epochs = max_epochs,
+                         dataloader_pin_memory = dataloader_pin_memory,
+                         callbacks = callbacks,
+                         generate_kwargs=generate_kwargs)
+
+        self.fully_initialized = True
+
     def update_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
@@ -85,11 +89,18 @@ def update_remote_makers(self):
             with torch.no_grad():
                 target_holder.update_experience_maker.remote(self.actor, self.critic)
 
+    def ready(self):
+        # indicate that self is fully initialized
+        while not hasattr(self, "fully_initialized") or self.fully_initialized == False:
+            time.sleep(1.0)
+        return True
 
     def training_step(self, experience: Experience) -> Dict[str, float]:
         self.actor.train()
         self.critic.train()
         
+        experience.to_device(torch.cuda.current_device())
+        
         num_actions = experience.action_mask.size(1)
         action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
         actor_loss = self.actor_loss_fn(action_log_probs,
@@ -113,7 +124,7 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.critic_optim.zero_grad()
         
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
-    
+
     def get_models(self):
         return self.actor, self.critic, self.actor_optim, self.critic_optim
 
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 51c256d13480..1920f1607302 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -86,41 +86,60 @@ def tokenize_fn(texts):
     sampler = strategy.setup_sampler(dataset)
 
     # configure Ray Actor
-    # 应当在trainer那边初始化模型的, 直接传, 传不过去
-    # maker和trainer这两头, 尽量完全隔离
-    trainer = DetachedPPOTrainer.options(name="trainer1", num_gpus=1).remote(
+    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=3).remote(
         experience_maker_holder_name_list= ["maker1"],
         strategy = strategy,
         actor = actor,
         critic = critic,
         actor_optim = actor_optim,
         critic_optim = critic_optim,
+        train_batch_size=args.train_batch_size,
+        buffer_limit = 16,
         experience_batch_size = args.experience_batch_size,
         max_epoch = args.max_epochs,
-        train_batch_size = args.train_batch_size,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
         )
     
-
-    experience_holder = ExperienceMakerHolder.options(name="maker1", num_gpus=1).remote(
+    experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
         ["trainer1"],
         actor_maker,
         critic_maker,
         reward_model,
-        initial_model)
-    
-    
+        initial_model,
+        experience_batch_size=args.experience_batch_size,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+        )
     
-    trainer.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps,update_timesteps=args.update_timesteps)
+    print("waiting for trainer...")
+    ray.get(trainer_ref.ready.remote())
+    print("...ready")
+
+    trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
 
-    experience_holder.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps+3)
+    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
 
-    trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim = trainer.get_models.remote()
+    ray.get([trainer_done_ref, maker_done_ref])
+    trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim = trainer_ref.get_models.remote()
     
     # save model checkpoint after fitting
-    trainer.strategy_save_model.remote(trainer_actor, args.save_path, only_rank0=True)
+    trainer_ref.strategy_save_model.remote(trainer_actor, args.save_path, only_rank0=True)
     # save optimizer checkpoint on all ranks
     if args.need_optim_ckpt:
-        trainer.strategy_save_optimizer.remote(trainer_actor_optim, 
+        trainer_ref.strategy_save_optimizer.remote(trainer_actor_optim, 
                                                'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
                                                only_rank0=False)
 
@@ -142,6 +161,8 @@ def tokenize_fn(texts):
     parser.add_argument('--train_batch_size', type=int, default=8)
     parser.add_argument('--experience_batch_size', type=int, default=8)
     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
+    
+    parser.add_argument('--debug', action='store_true')
     args = parser.parse_args()
     ray.init()
     main(args)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index ea9b2102956c..f7cae2844020 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -16,4 +16,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 torchrun --standalone 1m1t.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
-    --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file
+    --strategy naive --lora_rank 2 
\ No newline at end of file

From 523e2090ee6e725e2f05a111256db7905bea77c7 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 22 Mar 2023 14:15:05 +0800
Subject: [PATCH 08/36] facing cuda problem

---
 .../ChatGPT/chatgpt/experience_maker/detached.py       |  4 ++--
 applications/ChatGPT/chatgpt/replay_buffer/detached.py |  2 +-
 applications/ChatGPT/chatgpt/trainer/detached_ppo.py   |  2 +-
 applications/ChatGPT/examples/1m1t.py                  | 10 +++++-----
 applications/ChatGPT/examples/1m1t.sh                  |  6 +++++-
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 50d56348dda2..537e19ef93de 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -5,7 +5,7 @@
 import ray
 from torch import Tensor
 import torch.nn as nn
-from chatgpt.models.base import Actor
+from chatgpt.models.base import Actor, Critic
 from chatgpt.trainer.strategies.sampler import DistributedSampler
 
 @ray.remote
@@ -24,7 +24,7 @@ class ExperienceMakerHolder:
     def __init__(self, 
                  detached_trainer_name_list: List[str], 
                  actor: Actor,
-                 critic: nn.Module,
+                 critic: Critic,
                  reward_model: nn.Module,
                  initial_model: Actor,
                  kl_coef: float = 0.1,
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index ef5b0f2235f5..59909379f4a4 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -61,7 +61,7 @@ def clear(self) -> None:
         self.items = Queue(self.limit)
         self.worker_state = [False] * self.tp_world_size
         self.batch_collector = []
-    
+     
     @torch.no_grad()
     def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
         self.worker_state_lock.acquire()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 1c51fc65d127..7aeba029c17f 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -126,7 +126,7 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
     def get_models(self):
-        return self.actor, self.critic, self.actor_optim, self.critic_optim
+        return (self.actor, self.critic, self.actor_optim, self.critic_optim)
 
 def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
     origin_model = strategy._unwrap_actor(actor)
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 1920f1607302..7ce47a1ccee7 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -133,7 +133,7 @@ def tokenize_fn(texts):
     maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
 
     ray.get([trainer_done_ref, maker_done_ref])
-    trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim = trainer_ref.get_models.remote()
+    (trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim) = trainer_ref.get_models.remote()
     
     # save model checkpoint after fitting
     trainer_ref.strategy_save_model.remote(trainer_actor, args.save_path, only_rank0=True)
@@ -154,15 +154,15 @@ def tokenize_fn(texts):
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
     parser.add_argument('--need_optim_ckpt', type=bool, default=False)
-    parser.add_argument('--num_episodes', type=int, default=10)
-    parser.add_argument('--max_timesteps', type=int, default=10)
-    parser.add_argument('--update_timesteps', type=int, default=10)
+    parser.add_argument('--num_episodes', type=int, default=1)
+    parser.add_argument('--max_timesteps', type=int, default=1)
+    parser.add_argument('--update_timesteps', type=int, default=1)
     parser.add_argument('--max_epochs', type=int, default=5)
     parser.add_argument('--train_batch_size', type=int, default=8)
     parser.add_argument('--experience_batch_size', type=int, default=8)
     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
     
     parser.add_argument('--debug', action='store_true')
-    args = parser.parse_args()
+    args = parser.parse_args() 
     ray.init()
     main(args)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index f7cae2844020..72ff0c300161 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -16,4 +16,8 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 torchrun --standalone 1m1t.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 
\ No newline at end of file
+    --strategy naive --lora_rank 2 
+
+# ray job submit --working-dir /home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples -- python 1m1t.py \
+#     "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
+#     --strategy naive --lora_rank 2 
\ No newline at end of file

From 45361c2e24b9c1e5257eb7f403849df418a22e3e Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 22 Mar 2023 16:41:29 +0800
Subject: [PATCH 09/36] fix save functions

---
 .../chatgpt/experience_maker/detached.py      |  31 +++--
 .../ChatGPT/chatgpt/trainer/detached_base.py  |   6 -
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 115 +++++++++++-------
 applications/ChatGPT/chatgpt/trainer/utils.py |  21 +++-
 applications/ChatGPT/examples/1m1t.py         | 105 ++++++----------
 applications/ChatGPT/examples/1m1t.sh         |   2 +-
 6 files changed, 148 insertions(+), 132 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 537e19ef93de..0a2a419db930 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -5,32 +5,41 @@
 import ray
 from torch import Tensor
 import torch.nn as nn
-from chatgpt.models.base import Actor, Critic
+from chatgpt.models.base import Actor, Critic, RewardModel
 from chatgpt.trainer.strategies.sampler import DistributedSampler
+from chatgpt.trainer.strategies import Strategy
+from chatgpt.trainer.utils import is_rank_0, get_cuda_actor_critic_from_args
+from copy import deepcopy
 
 @ray.remote
 class ExperienceMakerHolder:
     '''
     Args:
         detached_trainer_name_list: str list to get ray actor handles
-        actor: \
-        critic: \
-        reward_model: \
-        initial_model: \
+        model:       for actor / critic / initial / reward init
+        pretrained:  for actor / critic / initial / reward init
+        lora_rank:   for actor / critic / initial / reward init
         kl_coef:                 NaiveExperienceMaker init
         experience_batch_size: batch size of generated experience
     '''
 
-    def __init__(self, 
+    def __init__(self,
                  detached_trainer_name_list: List[str], 
-                 actor: Actor,
-                 critic: Critic,
-                 reward_model: nn.Module,
-                 initial_model: Actor,
+                 strategy: Strategy,
+                 model: str,
+                 pretrained: str = None,
+                 lora_rank: int = 0,
                  kl_coef: float = 0.1,
                  experience_batch_size:int = 8,
                  **generate_kwargs):
-        
+
+        with strategy.model_init_context():
+            actor, critic = get_cuda_actor_critic_from_args(model, pretrained, lora_rank)
+            initial_model = deepcopy(actor)
+            reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
+
+        actor, critic, reward_model, initial_model= \
+            strategy.prepare(actor, critic, reward_model, initial_model)
         self.experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model,kl_coef)
         self.target_trainer_list = []
         self.experience_batch_size = experience_batch_size
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 9c584ab2baa4..a10fd341205f 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -108,12 +108,6 @@ def buffer_append(self, experience: Experience):
             print("[trainer] receiving exp")
         self.detached_replay_buffer.append(experience)
 
-    def strategy_save_model(self, model: nn.Module, path: str, only_rank0: bool = False) -> None:
-        self.strategy.save_model(model, path, only_rank0)
-
-    def strategy_save_potimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
-        self.strategy.save_optimizer(optimizer, path, only_rank0)
-
     def _on_fit_start(self) -> None:
         for callback in self.callbacks:
             callback.on_fit_start()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 7aeba029c17f..5373bd4dcd06 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -3,18 +3,22 @@
 
 import torch
 import torch.nn as nn
+from torch.optim import Optimizer
+from torch.optim import Adam
+
 from chatgpt.experience_maker import Experience, NaiveExperienceMaker
 from chatgpt.models.base import Actor, Critic
 from chatgpt.models.generation_utils import update_model_kwargs_fn
 from chatgpt.models.loss import PolicyLoss, ValueLoss
 from chatgpt.replay_buffer import DetachedReplayBuffer
-from torch.optim import Optimizer
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+
+from colossalai.nn.optimizer import HybridAdam
 
 from .detached_base import DetachedTrainer
 from .callbacks import Callback
 from .strategies import Strategy
-
-from .utils import is_rank_0
+from .utils import is_rank_0, get_cuda_actor_critic_from_args
 
 import ray
 
@@ -25,10 +29,9 @@ class DetachedPPOTrainer(DetachedTrainer):
         Detached Trainer for PPO algorithm
     Args:
         strategy (Strategy): the strategy to use for training
-        actor (Actor): the actor model in ppo algorithm
-        critic (Critic): the critic model in ppo algorithm
-        actor_optim (Optimizer): the optimizer to use for actor model
-        critic_optim (Optimizer): the optimizer to use for critic model
+        model (str) : for actor / critic init
+        pretrained (str) : for actor / critic init
+        lora_rank (int) : for actor / critic init
         train_batch_size (int, defaults to 8): the batch size to use for training
         train_batch_size (int, defaults to 8): the batch size to use for training
         buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
@@ -41,43 +44,52 @@ class DetachedPPOTrainer(DetachedTrainer):
         callbacks (List[Callback], defaults to []): the callbacks to call during training process
         generate_kwargs (dict, optional): the kwargs to use while model generating
     '''
-    
-    def __init__(self, 
-                experience_maker_holder_name_list: List[str],
-                strategy: Strategy,
-                actor: Actor,
-                critic: Critic,
-                actor_optim: Optimizer,
-                critic_optim: Optimizer,
-                train_batch_size: int = 8,
-                buffer_limit: int = 0,
-                buffer_cpu_offload: bool = True,
-                eps_clip: float = 0.2,
-                value_clip: float = 0.4,
-                experience_batch_size: int = 8,
-                max_epochs: int = 1,
-                dataloader_pin_memory: bool = True,
-                callbacks: List[Callback] = [],
-                **generate_kwargs) -> None:
+
+    def __init__(self,
+                 experience_maker_holder_name_list: List[str],
+                 strategy: Strategy,
+                 model: str,
+                 pretrained: str = None,
+                 lora_rank: int = 0,
+                 train_batch_size: int = 8,
+                 buffer_limit: int = 0,
+                 buffer_cpu_offload: bool = True,
+                 eps_clip: float = 0.2,
+                 value_clip: float = 0.4,
+                 experience_batch_size: int = 8,
+                 max_epochs: int = 1,
+                 dataloader_pin_memory: bool = True,
+                 callbacks: List[Callback] = [],
+                 **generate_kwargs) -> None:
         self.fully_initialized = False
-        generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
-        self.actor = actor
-        self.critic = critic
+        
+        self.strategy = strategy
+        # configure models, loss and optimizers
+        with self.strategy.model_init_context():
+            self.actor, self.critic = get_cuda_actor_critic_from_args(model, pretrained, lora_rank)
         self.actor_loss_fn = PolicyLoss(eps_clip)
         self.critic_loss_fn = ValueLoss(value_clip)
-        self.actor_optim = actor_optim
-        self.critic_optim = critic_optim
-        super().__init__(experience_maker_holder_name_list, 
-                         strategy = strategy,
+        if isinstance(self.strategy, ColossalAIStrategy):
+            self.actor_optim = HybridAdam(self.actor.parameters(), lr=5e-6)
+            self.critic_optim = HybridAdam(self.critic.parameters(), lr=5e-6)
+        else:
+            self.actor_optim = Adam(self.actor.parameters(), lr=5e-6)
+            self.critic_optim = Adam(self.critic.parameters(), lr=5e-6)
+        (self.actor, self.actor_optim), (self.critic, self.critic_optim) = \
+            self.strategy.prepare((self.actor, self.actor_optim), (self.critic, self.critic_optim))
+
+        generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, self.actor)
+        
+        super().__init__(experience_maker_holder_name_list,
+                         strategy=strategy,
                          train_batch_size=train_batch_size,
                          buffer_limit=buffer_limit,
                          buffer_cpu_offload=buffer_cpu_offload,
-                         experience_batch_size = experience_batch_size,
-                         max_epochs = max_epochs,
-                         dataloader_pin_memory = dataloader_pin_memory,
-                         callbacks = callbacks,
+                         experience_batch_size=experience_batch_size,
+                         max_epochs=max_epochs,
+                         dataloader_pin_memory=dataloader_pin_memory,
+                         callbacks=callbacks,
                          generate_kwargs=generate_kwargs)
-
         self.fully_initialized = True
 
     def update_remote_makers(self):
@@ -98,9 +110,9 @@ def ready(self):
     def training_step(self, experience: Experience) -> Dict[str, float]:
         self.actor.train()
         self.critic.train()
-        
+
         experience.to_device(torch.cuda.current_device())
-        
+
         num_actions = experience.action_mask.size(1)
         action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
         actor_loss = self.actor_loss_fn(action_log_probs,
@@ -110,7 +122,7 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.strategy.backward(actor_loss, self.actor, self.actor_optim)
         self.strategy.optimizer_step(self.actor_optim)
         self.actor_optim.zero_grad()
-        
+
         values = self.critic(experience.sequences,
                              action_mask=experience.action_mask,
                              attention_mask=experience.attention_mask)
@@ -118,15 +130,30 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
                                           experience.values,
                                           experience.reward,
                                           action_mask=experience.action_mask)
-        
+
         self.strategy.backward(critic_loss, self.critic, self.critic_optim)
         self.strategy.optimizer_step(self.critic_optim)
         self.critic_optim.zero_grad()
-        
+
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
-    def get_models(self):
-        return (self.actor, self.critic, self.actor_optim, self.critic_optim)
+    def strategy_save_actor(self, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_model(self.actor, path, only_rank0)
+
+    def strategy_save_critic(self, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_model(self.critic, path, only_rank0)
+
+    def strategy_save_actor_optim(self, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_optimizer(self.actor_optim, path, only_rank0)
+
+    def strategy_save_critic_optim(self, path: str, only_rank0: bool = False) -> None:
+        self.strategy.save_optimizer(self.critic_optim, path, only_rank0)
+
+    def get_actor(self):
+        return self.actor
+    
+    def get_critic(self):
+        return self.critic
 
 def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
     origin_model = strategy._unwrap_actor(actor)
diff --git a/applications/ChatGPT/chatgpt/trainer/utils.py b/applications/ChatGPT/chatgpt/trainer/utils.py
index 6c9f7f085f8c..bda3292bc146 100644
--- a/applications/ChatGPT/chatgpt/trainer/utils.py
+++ b/applications/ChatGPT/chatgpt/trainer/utils.py
@@ -1,5 +1,22 @@
 import torch.distributed as dist
-
-
+from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
+from chatgpt.models.gpt import GPTActor, GPTCritic
+from chatgpt.models.opt import OPTActor, OPTCritic
+import torch
 def is_rank_0() -> bool:
     return not dist.is_initialized() or dist.get_rank() == 0
+
+
+def get_cuda_actor_critic_from_args(model:str, pretrained: str = None, lora_rank=0):
+    if model == 'gpt2':
+        actor = GPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = GPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    elif model == 'bloom':
+        actor = BLOOMActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = BLOOMCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    elif model == 'opt':
+        actor = OPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    else:
+        raise ValueError(f'Unsupported model "{model}"')
+    return actor, critic
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 7ce47a1ccee7..5c205ba59f70 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -20,6 +20,7 @@
 
 # TODO: update maker actor/critic
 
+
 def main(args):
     # configure strategy
     if args.strategy == 'naive':
@@ -33,36 +34,6 @@ def main(args):
     else:
         raise ValueError(f'Unsupported strategy "{args.strategy}"')
 
-    # configure model / optimizer
-    with strategy.model_init_context():
-        if args.model == 'gpt2':
-            actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-            critic = GPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-        elif args.model == 'bloom':
-            actor = BLOOMActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-            critic = BLOOMCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-        elif args.model == 'opt':
-            actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-            critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
-        else:
-            raise ValueError(f'Unsupported model "{args.model}"')
-
-    initial_model = deepcopy(actor)
-    reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
-
-    if args.strategy.startswith('colossalai'):
-        actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
-        critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
-    else:
-        actor_optim = Adam(actor.parameters(), lr=5e-6)
-        critic_optim = Adam(critic.parameters(), lr=5e-6)
-
-    (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
-        (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
-
-    actor_maker = deepcopy(actor)
-    critic_maker = deepcopy(critic)
-
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -75,28 +46,17 @@ def main(args):
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
 
-    def tokenize_fn(texts):
-        # MUST padding to max length to ensure inputs of all ranks have the same length
-        # Different length may lead to hang when using gemini, as different generation steps
-        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
-        return {k: v.cuda() for k, v in batch.items()}
-    
-    # configure sampler
-    dataset = pd.read_csv(args.prompt_path)['prompt']
-    sampler = strategy.setup_sampler(dataset)
-
-    # configure Ray Actor
+    # configure Trainer
     trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=3).remote(
-        experience_maker_holder_name_list= ["maker1"],
-        strategy = strategy,
-        actor = actor,
-        critic = critic,
-        actor_optim = actor_optim,
-        critic_optim = critic_optim,
+        experience_maker_holder_name_list=["maker1"],
+        strategy=strategy,
+        model=args.model,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
         train_batch_size=args.train_batch_size,
-        buffer_limit = 16,
-        experience_batch_size = args.experience_batch_size,
-        max_epoch = args.max_epochs,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epoch=args.max_epochs,
         #kwargs:
         max_length=128,
         do_sample=True,
@@ -105,14 +65,22 @@ def tokenize_fn(texts):
         pad_token_id=tokenizer.pad_token_id,
         eos_token_id=tokenizer.eos_token_id,
         debug=args.debug,
-        )
-    
+    )
+
+    def tokenize_fn(texts):
+        # MUST padding to max length to ensure inputs of all ranks have the same length
+        # Different length may lead to hang when using gemini, as different generation steps
+        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
+        return {k: v.cuda() for k, v in batch.items()}
+
+    # configure Experience Maker
     experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
-        ["trainer1"],
-        actor_maker,
-        critic_maker,
-        reward_model,
-        initial_model,
+        detached_trainer_name_list=["trainer1"],
+        strategy=strategy,
+        model=args.model,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        kl_coef=0.1,
         experience_batch_size=args.experience_batch_size,
         #kwargs:
         max_length=128,
@@ -122,26 +90,27 @@ def tokenize_fn(texts):
         pad_token_id=tokenizer.pad_token_id,
         eos_token_id=tokenizer.eos_token_id,
         debug=args.debug,
-        )
-    
+    )
+
+    # configure sampler
+    dataset = pd.read_csv(args.prompt_path)['prompt']
+    sampler = strategy.setup_sampler(dataset)
+
     print("waiting for trainer...")
     ray.get(trainer_ref.ready.remote())
     print("...ready")
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
-
     maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
 
     ray.get([trainer_done_ref, maker_done_ref])
-    (trainer_actor, trainer_critic, trainer_actor_optim, trainer_critic_optim) = trainer_ref.get_models.remote()
-    
+
     # save model checkpoint after fitting
-    trainer_ref.strategy_save_model.remote(trainer_actor, args.save_path, only_rank0=True)
+    trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
     # save optimizer checkpoint on all ranks
     if args.need_optim_ckpt:
-        trainer_ref.strategy_save_optimizer.remote(trainer_actor_optim, 
-                                               'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
-                                               only_rank0=False)
+        trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                     only_rank0=False)
 
 
 if __name__ == '__main__':
@@ -161,8 +130,8 @@ def tokenize_fn(texts):
     parser.add_argument('--train_batch_size', type=int, default=8)
     parser.add_argument('--experience_batch_size', type=int, default=8)
     parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
-    
+
     parser.add_argument('--debug', action='store_true')
-    args = parser.parse_args() 
+    args = parser.parse_args()
     ray.init()
     main(args)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index 72ff0c300161..8c1806094545 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -15,7 +15,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-torchrun --standalone 1m1t.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
+torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --strategy naive --lora_rank 2 
 
 # ray job submit --working-dir /home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples -- python 1m1t.py \

From 42aa4c70a63c77dbd749be76ca15b615f014e21f Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 22 Mar 2023 18:11:34 +0800
Subject: [PATCH 10/36] verified maker update

---
 .../chatgpt/experience_maker/detached.py        | 17 ++++++++++++++---
 .../ChatGPT/chatgpt/trainer/detached_base.py    |  7 +++----
 .../ChatGPT/chatgpt/trainer/detached_ppo.py     |  3 ++-
 applications/ChatGPT/examples/1m1t.py           |  8 ++++----
 applications/ChatGPT/examples/1m1t.sh           |  6 +-----
 5 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 0a2a419db930..d4f0d0309e2a 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -10,6 +10,7 @@
 from chatgpt.trainer.strategies import Strategy
 from chatgpt.trainer.utils import is_rank_0, get_cuda_actor_critic_from_args
 from copy import deepcopy
+from threading import Lock
 
 @ray.remote
 class ExperienceMakerHolder:
@@ -46,7 +47,10 @@ def __init__(self,
         self.generate_kwargs = generate_kwargs
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name))
-    
+            
+        self.model_visit_lock = Lock()
+
+        
     # copy from ../trainer/base.py
     def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
         if isinstance(inputs, Tensor):
@@ -89,9 +93,11 @@ def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[
                     inputs = tokenizer(rand_prompts)
             else:
                 inputs = rand_prompts
+            self.model_visit_lock.acquire()
             self.make_and_send(inputs)
-        
-    def update_experience_maker(self, new_actor, new_critic):
+            self.model_visit_lock.release()
+
+    def update_experience_maker(self, new_actor: Actor, new_critic: Critic):
         # TODO: parameter update
         '''
         pseudo:
@@ -99,8 +105,13 @@ def update_experience_maker(self, new_actor, new_critic):
             self.experience_maker.critic.update()
         '''
         # TODO: reduce malloc
+        self.model_visit_lock.acquire()
         with torch.no_grad():
+            print("*******UPDATE*******")
+            # backup = deepcopy(self.experience_maker.critic)
             self.experience_maker.actor = new_actor
             self.experience_maker.critic = new_critic
+            # print(sum((x - y).abs().sum() for x,y in zip(backup.state_dict().values(), self.experience_maker.critic.state_dict().values())))
+        self.model_visit_lock.release()
         pass
         
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index a10fd341205f..e935ba6f50fc 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -41,7 +41,7 @@ def __init__(self,
                  buffer_limit: int = 0,
                  buffer_cpu_offload: bool = True,
                  experience_batch_size: int = 8,
-                 max_epochs: int = 1,
+                 max_epochs: int = 10,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs)->None:
@@ -88,10 +88,9 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
             for timestep in tqdm(range(max_timesteps),
                                  desc=f'Episode [{episode+1}/{num_episodes}]',
                                  disable=not is_rank_0()):
-                for _ in range(update_timesteps):
-                    self._learn()
+                self._learn()
                 # assume those remote holders are working
-                # self.update_remote_makers()
+                self.update_remote_makers()
 
             self._on_episode_end(episode)
         self._on_fit_end()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 5373bd4dcd06..47192f18f338 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -57,7 +57,7 @@ def __init__(self,
                  eps_clip: float = 0.2,
                  value_clip: float = 0.4,
                  experience_batch_size: int = 8,
-                 max_epochs: int = 1,
+                 max_epochs: int = 10,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs) -> None:
@@ -98,6 +98,7 @@ def update_remote_makers(self):
             self.update_target_holder_list(self.target_holder_name_list)
         for target_holder in self.target_holder_list:
             # TODO: reduce malloc
+            
             with torch.no_grad():
                 target_holder.update_experience_maker.remote(self.actor, self.critic)
 
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 5c205ba59f70..8c7d5d3ece18 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -56,7 +56,7 @@ def main(args):
         train_batch_size=args.train_batch_size,
         buffer_limit=16,
         experience_batch_size=args.experience_batch_size,
-        max_epoch=args.max_epochs,
+        max_epochs=args.max_epochs,
         #kwargs:
         max_length=128,
         do_sample=True,
@@ -123,9 +123,9 @@ def tokenize_fn(texts):
     parser.add_argument('--pretrain', type=str, default=None)
     parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
     parser.add_argument('--need_optim_ckpt', type=bool, default=False)
-    parser.add_argument('--num_episodes', type=int, default=1)
-    parser.add_argument('--max_timesteps', type=int, default=1)
-    parser.add_argument('--update_timesteps', type=int, default=1)
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_timesteps', type=int, default=10)
+    parser.add_argument('--update_timesteps', type=int, default=10)
     parser.add_argument('--max_epochs', type=int, default=5)
     parser.add_argument('--train_batch_size', type=int, default=8)
     parser.add_argument('--experience_batch_size', type=int, default=8)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index 8c1806094545..cc5ee095da6d 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -13,11 +13,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
 }
 
-set_n_least_used_CUDA_VISIBLE_DEVICES 2
+set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
 torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --strategy naive --lora_rank 2 
-
-# ray job submit --working-dir /home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples -- python 1m1t.py \
-#     "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
-#     --strategy naive --lora_rank 2 
\ No newline at end of file

From 26d82b586215314a484c39f62e52ed8d2bce65af Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 23 Mar 2023 10:52:22 +0800
Subject: [PATCH 11/36] nothing

---
 applications/ChatGPT/examples/1m1t.py          | 4 ----
 applications/ChatGPT/examples/1m1t.sh          | 2 +-
 applications/ChatGPT/examples/train_prompts.sh | 7 +++++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 8c7d5d3ece18..5246212bb87f 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -3,10 +3,6 @@
 
 import pandas as pd
 import torch
-from chatgpt.models.base import RewardModel
-from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
-from chatgpt.models.gpt import GPTActor, GPTCritic
-from chatgpt.models.opt import OPTActor, OPTCritic
 from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
 from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index cc5ee095da6d..e06a955ffb7a 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -16,4 +16,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
 torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 
+    --strategy naive --lora_rank 2 --pretrain "bigscience/bloom-560m" --model 'bloom'
diff --git a/applications/ChatGPT/examples/train_prompts.sh b/applications/ChatGPT/examples/train_prompts.sh
index dcb0aa7f07ec..954097bbf813 100755
--- a/applications/ChatGPT/examples/train_prompts.sh
+++ b/applications/ChatGPT/examples/train_prompts.sh
@@ -17,5 +17,8 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 # torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py "/home/lccsr/HPC-AI/ColossalAI/applications/ChatGPT/examples/awesome-chatgpt-prompts/prompts.csv" \
-    --strategy colossalai_zero2 --lora_rank 2
+torchrun --standalone --nproc_per_node=2 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
+    --strategy colossalai_zero2 --lora_rank 2 --pretrain "bigscience/bloom-560m" --model 'bloom'
+
+# torchrun --standalone --nproc_per_node=2 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
+#     --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file

From 517ff22d63f73df76b130e8d9a0f436f959289ad Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 23 Mar 2023 11:10:24 +0800
Subject: [PATCH 12/36] add ignore

---
 applications/ChatGPT/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/applications/ChatGPT/.gitignore b/applications/ChatGPT/.gitignore
index 40f3f6debeee..2c576f093e75 100644
--- a/applications/ChatGPT/.gitignore
+++ b/applications/ChatGPT/.gitignore
@@ -144,3 +144,5 @@ docs/.build
 
 # ignore version.py generated by setup.py
 colossalai/version.py
+
+examples/awesome-chatgpt-prompts/
\ No newline at end of file

From b91348da4293ae615057a53022ad1bfeec8f05df Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Fri, 24 Mar 2023 13:33:06 +0800
Subject: [PATCH 13/36] analyize loss issue

---
 .../chatgpt/experience_maker/detached.py      | 26 +++++++++----------
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 10 +++----
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 26 +++++++++++--------
 applications/ChatGPT/chatgpt/trainer/ppo.py   |  2 ++
 applications/ChatGPT/examples/1m1t.py         |  4 +--
 applications/ChatGPT/examples/1m1t.sh         |  2 +-
 .../ChatGPT/examples/train_prompts.py         |  1 +
 .../ChatGPT/examples/train_prompts.sh         |  6 ++---
 8 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index d4f0d0309e2a..f1ebad69f455 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -38,18 +38,20 @@ def __init__(self,
             actor, critic = get_cuda_actor_critic_from_args(model, pretrained, lora_rank)
             initial_model = deepcopy(actor)
             reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
-
+        
         actor, critic, reward_model, initial_model= \
             strategy.prepare(actor, critic, reward_model, initial_model)
+
         self.experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model,kl_coef)
         self.target_trainer_list = []
         self.experience_batch_size = experience_batch_size
         self.generate_kwargs = generate_kwargs
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name))
-            
+        
         self.model_visit_lock = Lock()
-
+        self.strategy = strategy
+        self.strategy.save_model(self.experience_maker.actor, 'maker_actor_test.pt')
         
     # copy from ../trainer/base.py
     def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
@@ -71,7 +73,7 @@ def make_and_send(self, inputs):
         chosen_trainer = None
         min_length = None
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[maker] choosing tartget")
+            print("[maker] choosing tartget trainer")
         while chosen_trainer is None:
             for target_trainer in self.target_trainer_list:
                 temp_length = ray.get(target_trainer.buffer_get_length.remote())
@@ -83,7 +85,7 @@ def make_and_send(self, inputs):
                         min_length = temp_length
                         chosen_trainer = target_trainer
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[maker] sending")
+            print("[maker] sending exp")
         chosen_trainer.buffer_append.remote(experience)
 
     def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
@@ -93,25 +95,23 @@ def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[
                     inputs = tokenizer(rand_prompts)
             else:
                 inputs = rand_prompts
+
             self.model_visit_lock.acquire()
             self.make_and_send(inputs)
             self.model_visit_lock.release()
 
     def update_experience_maker(self, new_actor: Actor, new_critic: Critic):
-        # TODO: parameter update
         '''
-        pseudo:
-            self.experience_maker.actor.update()
-            self.experience_maker.critic.update()
+            called by trainer
         '''
         # TODO: reduce malloc
         self.model_visit_lock.acquire()
         with torch.no_grad():
-            print("*******UPDATE*******")
-            # backup = deepcopy(self.experience_maker.critic)
+            if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+                print("[maker] UPDATE ")
             self.experience_maker.actor = new_actor
             self.experience_maker.critic = new_critic
-            # print(sum((x - y).abs().sum() for x,y in zip(backup.state_dict().values(), self.experience_maker.critic.state_dict().values())))
+            # print(sum((x - y).abs().sum() for x,y in zip(self.experience_maker.reward_model.state_dict().values(), self.experience_maker.critic.state_dict().values())))
+            # print(sum((x - y).abs().sum() for x,y in zip(self.experience_maker.initial_model.state_dict().values(), self.experience_maker.actor.state_dict().values())))
         self.model_visit_lock.release()
         pass
-        
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index e935ba6f50fc..9d8034ea7e29 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -14,7 +14,7 @@
 from .utils import is_rank_0
 
 import ray
-
+import sys
 
 # @ray.remote
 class DetachedTrainer(ABC):
@@ -63,7 +63,7 @@ def update_target_holder_list(self, experience_maker_holder_name_list):
             self.target_holder_list.append(ray.get_actor(name))
 
     @abstractmethod
-    def update_remote_makers(self):
+    def _update_remote_makers(self):
         pass
 
     @abstractmethod
@@ -85,13 +85,11 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
         self._on_fit_start()
         for episode in range(num_episodes):
             self._on_episode_start(episode)
-            for timestep in tqdm(range(max_timesteps),
+            for timestep in tqdm(range(max_timesteps//update_timesteps),
                                  desc=f'Episode [{episode+1}/{num_episodes}]',
                                  disable=not is_rank_0()):
                 self._learn()
-                # assume those remote holders are working
-                self.update_remote_makers()
-
+                self._update_remote_makers()
             self._on_episode_end(episode)
         self._on_fit_end()
 
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 47192f18f338..fb86e41988ca 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -21,7 +21,7 @@
 from .utils import is_rank_0, get_cuda_actor_critic_from_args
 
 import ray
-
+import copy
 
 @ray.remote
 class DetachedPPOTrainer(DetachedTrainer):
@@ -77,9 +77,11 @@ def __init__(self,
             self.critic_optim = Adam(self.critic.parameters(), lr=5e-6)
         (self.actor, self.actor_optim), (self.critic, self.critic_optim) = \
             self.strategy.prepare((self.actor, self.actor_optim), (self.critic, self.critic_optim))
-
+        self.strategy.save_model(self.actor, "trainer_actor_test.pt")
+        # self.initial_model = copy.deepcopy(self.actor)
+        # self.reward_model = copy.deepcopy(self.critic)
         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, self.actor)
-        
+
         super().__init__(experience_maker_holder_name_list,
                          strategy=strategy,
                          train_batch_size=train_batch_size,
@@ -89,18 +91,17 @@ def __init__(self,
                          max_epochs=max_epochs,
                          dataloader_pin_memory=dataloader_pin_memory,
                          callbacks=callbacks,
-                         generate_kwargs=generate_kwargs)
+                         **generate_kwargs)
         self.fully_initialized = True
 
-    def update_remote_makers(self):
+    def _update_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
             self.update_target_holder_list(self.target_holder_name_list)
-        for target_holder in self.target_holder_list:
-            # TODO: reduce malloc
-            
-            with torch.no_grad():
-                target_holder.update_experience_maker.remote(self.actor, self.critic)
+            for target_holder in self.target_holder_list:
+                # TODO: reduce malloc
+                with torch.no_grad():
+                    ray.get(target_holder.update_experience_maker.remote(self.actor, self.critic))
 
     def ready(self):
         # indicate that self is fully initialized
@@ -113,7 +114,6 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.critic.train()
 
         experience.to_device(torch.cuda.current_device())
-
         num_actions = experience.action_mask.size(1)
         action_log_probs = self.actor(experience.sequences, num_actions, attention_mask=experience.attention_mask)
         actor_loss = self.actor_loss_fn(action_log_probs,
@@ -136,6 +136,9 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.strategy.optimizer_step(self.critic_optim)
         self.critic_optim.zero_grad()
 
+        # print(sum(((x - y).abs().sum() for x, y in zip(self.initial_model.state_dict().values(),self.actor.state_dict().values()))))
+        # print(sum(((x - y).abs().sum() for x, y in zip(self.reward_model.state_dict().values(), self.critic.state_dict().values()))))
+
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
     def strategy_save_actor(self, path: str, only_rank0: bool = False) -> None:
@@ -167,3 +170,4 @@ def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, acto
         new_kwargs['update_model_kwargs_fn'] = update_model_kwargs_fn
 
     return new_kwargs
+   
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/ppo.py b/applications/ChatGPT/chatgpt/trainer/ppo.py
index dacab4784039..2dfd9f739bac 100644
--- a/applications/ChatGPT/chatgpt/trainer/ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/ppo.py
@@ -100,6 +100,8 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.strategy.optimizer_step(self.critic_optim)
         self.critic_optim.zero_grad()
 
+        # print(sum(((x-y).abs().sum() for x, y in zip(self.experience_maker.initial_model.state_dict().values(),self.actor.state_dict().values()))))
+        # print(sum(((x - y).abs().sum() for x, y in zip(self.experience_maker.reward_model.state_dict().values(), self.critic.state_dict().values()))))
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
 
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 5246212bb87f..5c2ff15c7551 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -97,8 +97,8 @@ def tokenize_fn(texts):
     print("...ready")
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
-    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
-
+    # maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
+    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=1)
     ray.get([trainer_done_ref, maker_done_ref])
 
     # save model checkpoint after fitting
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index e06a955ffb7a..659cc7f7c6ab 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -16,4 +16,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
 torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 --pretrain "bigscience/bloom-560m" --model 'bloom'
+    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' # --debug
diff --git a/applications/ChatGPT/examples/train_prompts.py b/applications/ChatGPT/examples/train_prompts.py
index 8f48a11c33e8..028896b24fad 100644
--- a/applications/ChatGPT/examples/train_prompts.py
+++ b/applications/ChatGPT/examples/train_prompts.py
@@ -89,6 +89,7 @@ def tokenize_fn(texts):
         max_epochs=args.max_epochs,
         train_batch_size=args.train_batch_size,
         experience_batch_size=args.experience_batch_size,
+        sample_replay_buffer = True,
         tokenizer=tokenize_fn,
         max_length=128,
         do_sample=True,
diff --git a/applications/ChatGPT/examples/train_prompts.sh b/applications/ChatGPT/examples/train_prompts.sh
index 954097bbf813..44bc0b516717 100755
--- a/applications/ChatGPT/examples/train_prompts.sh
+++ b/applications/ChatGPT/examples/train_prompts.sh
@@ -13,12 +13,12 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
 }
 
-set_n_least_used_CUDA_VISIBLE_DEVICES 2
+set_n_least_used_CUDA_VISIBLE_DEVICES 1
 
 # torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy colossalai_zero2 --lora_rank 2 --pretrain "bigscience/bloom-560m" --model 'bloom'
+torchrun --standalone --nproc_per_node=1 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
+    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt'
 
 # torchrun --standalone --nproc_per_node=2 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
 #     --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file

From ebd2be94b880502b71438332f27bdf42042fbef4 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Fri, 24 Mar 2023 16:56:45 +0800
Subject: [PATCH 14/36] remove some debug codes

---
 .../chatgpt/experience_maker/detached.py      |  2 --
 .../ChatGPT/chatgpt/trainer/detached_base.py  |  8 +++--
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  6 ----
 applications/ChatGPT/chatgpt/trainer/ppo.py   |  2 --
 applications/ChatGPT/examples/1m1t.py         |  1 -
 applications/ChatGPT/examples/1m1t.sh         | 34 +++++++++----------
 6 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index cb95bf4bba85..29453376f52f 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -128,6 +128,4 @@ def update_experience_maker(self, new_actor: Actor, new_critic: Critic):
                 print("[maker] UPDATE ")
             self.experience_maker.actor = new_actor
             self.experience_maker.critic = new_critic
-            # print(sum((x - y).abs().sum() for x,y in zip(self.experience_maker.reward_model.state_dict().values(), self.experience_maker.critic.state_dict().values())))
-            # print(sum((x - y).abs().sum() for x,y in zip(self.experience_maker.initial_model.state_dict().values(), self.experience_maker.actor.state_dict().values())))
         self.model_visit_lock.release()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 9d8034ea7e29..c1faffa8a61f 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -17,6 +17,8 @@
 import sys
 
 # @ray.remote
+
+
 class DetachedTrainer(ABC):
     '''
         Base class for detached rlhf trainers. 
@@ -44,7 +46,7 @@ def __init__(self,
                  max_epochs: int = 10,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
-                 **generate_kwargs)->None:
+                 **generate_kwargs) -> None:
         super().__init__()
         self.strategy = strategy
         self.detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
@@ -85,7 +87,7 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
         self._on_fit_start()
         for episode in range(num_episodes):
             self._on_episode_start(episode)
-            for timestep in tqdm(range(max_timesteps//update_timesteps),
+            for timestep in tqdm(range(max_timesteps // update_timesteps),
                                  desc=f'Episode [{episode+1}/{num_episodes}]',
                                  disable=not is_rank_0()):
                 self._learn()
@@ -119,4 +121,4 @@ def _on_episode_start(self, episode: int) -> None:
 
     def _on_episode_end(self, episode: int) -> None:
         for callback in self.callbacks:
-            callback.on_episode_end(episode)
\ No newline at end of file
+            callback.on_episode_end(episode)
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 728f6e99da1a..4f5558d81993 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -75,8 +75,6 @@ def __init__(self,
             self.critic_optim = Adam(self.critic.parameters(), lr=5e-6)
         (self.actor, self.actor_optim), (self.critic, self.critic_optim) = \
             self.strategy.prepare((self.actor, self.actor_optim), (self.critic, self.critic_optim))
-        # self.initial_model = copy.deepcopy(self.actor)
-        # self.reward_model = copy.deepcopy(self.critic)
         generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, self.actor)
 
         super().__init__(experience_maker_holder_name_list,
@@ -134,10 +132,6 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.strategy.backward(critic_loss, self.critic, self.critic_optim)
         self.strategy.optimizer_step(self.critic_optim)
         self.critic_optim.zero_grad()
-
-        # print(sum(((x - y).abs().sum() for x, y in zip(self.initial_model.state_dict().values(),self.actor.state_dict().values()))))
-        # print(sum(((x - y).abs().sum() for x, y in zip(self.reward_model.state_dict().values(), self.critic.state_dict().values()))))
-
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
     def strategy_save_actor(self, path: str, only_rank0: bool = False) -> None:
diff --git a/applications/ChatGPT/chatgpt/trainer/ppo.py b/applications/ChatGPT/chatgpt/trainer/ppo.py
index 2dfd9f739bac..dacab4784039 100644
--- a/applications/ChatGPT/chatgpt/trainer/ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/ppo.py
@@ -100,8 +100,6 @@ def training_step(self, experience: Experience) -> Dict[str, float]:
         self.strategy.optimizer_step(self.critic_optim)
         self.critic_optim.zero_grad()
 
-        # print(sum(((x-y).abs().sum() for x, y in zip(self.experience_maker.initial_model.state_dict().values(),self.actor.state_dict().values()))))
-        # print(sum(((x - y).abs().sum() for x, y in zip(self.experience_maker.reward_model.state_dict().values(), self.critic.state_dict().values()))))
         return {'actor_loss': actor_loss.item(), 'critic_loss': critic_loss.item()}
 
 
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 739e5a73299c..d9f586599798 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -92,7 +92,6 @@ def tokenize_fn(texts):
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
     maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
-    # maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=1)
 
     ray.get([trainer_done_ref, maker_done_ref])
 
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index cdb5f5175cd5..dc96007fbefa 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -1,21 +1,21 @@
-# set_n_least_used_CUDA_VISIBLE_DEVICES() {
-#     local n=${1:-"9999"}
-#     echo "GPU Memory Usage:"
-#     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
-#         | tail -n +2 \
-#         | nl -v 0 \
-#         | tee /dev/tty \
-#         | sort -g -k 2 \
-#         | awk '{print $1}' \
-#         | head -n $n)
-#     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
-#     echo "Now CUDA_VISIBLE_DEVICES is set to:"
-#     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-# }
-# 
-# set_n_least_used_CUDA_VISIBLE_DEVICES 1
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
 
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+# export CUDA_VISIBLE_DEVICES="0,1,2,3"
 
 torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' # --debug

From 650ec5ba112b79b9f5113b785bdd7621223f56d3 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 27 Mar 2023 11:40:59 +0800
Subject: [PATCH 15/36] facing 2m1t stuck issue

---
 .../chatgpt/experience_maker/detached.py      |   2 +
 .../ChatGPT/chatgpt/trainer/detached_base.py  |   4 +-
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |   5 +-
 applications/ChatGPT/examples/2m1t.py         | 142 ++++++++++++++++++
 applications/ChatGPT/examples/2m1t.sh         |  21 +++
 5 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 applications/ChatGPT/examples/2m1t.py
 create mode 100644 applications/ChatGPT/examples/2m1t.sh

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 29453376f52f..2bb33e9f2d73 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -42,6 +42,8 @@ def __init__(self,
         self.experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, self.kl_coef)
         self.model_visit_lock = Lock()
         self.fully_initialized = False
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print('[maker] Waiting for INIT')
 
     def _get_ready(self):
         while not self.fully_initialized:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index c1faffa8a61f..6cddc4017a11 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -95,16 +95,18 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
             self._on_episode_end(episode)
         self._on_fit_end()
 
+    @ray.method(concurrency_group="io")
     def buffer_get_length(self):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print("[trainer] telling length")
         return self.detached_replay_buffer.get_length()
 
+    @ray.method(concurrency_group="io")
     def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[trainer] receiving exp")
+            print(f"[trainer] receiving exp. Current buffer length: {self.detached_replay_buffer.get_length()}")
         self.detached_replay_buffer.append(experience)
 
     def _on_fit_start(self) -> None:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 4f5558d81993..3605e8a17be8 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -23,7 +23,7 @@
 import ray
 import copy
 
-@ray.remote
+@ray.remote(concurrency_groups={"io": 4, "compute": 2})
 class DetachedPPOTrainer(DetachedTrainer):
     '''
         Detached Trainer for PPO algorithm
@@ -88,6 +88,7 @@ def __init__(self,
                          callbacks=callbacks,
                          **generate_kwargs)
 
+    @ray.method(concurrency_group="io")
     def _update_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
@@ -97,6 +98,7 @@ def _update_remote_makers(self):
                 with torch.no_grad():
                     ray.get(target_holder.update_experience_maker.remote(self.actor, self.critic))
                     
+    @ray.method(concurrency_group="io")
     def initialize_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
@@ -106,6 +108,7 @@ def initialize_remote_makers(self):
                 with torch.no_grad():
                     ray.get(target_holder.initialize_experience_maker.remote(self.actor, self.critic))
 
+    @ray.method(concurrency_group="compute")
     def training_step(self, experience: Experience) -> Dict[str, float]:
         self.actor.train()
         self.critic.train()
diff --git a/applications/ChatGPT/examples/2m1t.py b/applications/ChatGPT/examples/2m1t.py
new file mode 100644
index 000000000000..8da6445f7d8c
--- /dev/null
+++ b/applications/ChatGPT/examples/2m1t.py
@@ -0,0 +1,142 @@
+import argparse
+from copy import deepcopy
+
+import pandas as pd
+import torch
+from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from torch.optim import Adam
+from transformers import AutoTokenizer, BloomTokenizerFast
+from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
+
+from colossalai.nn.optimizer import HybridAdam
+
+import ray
+
+
+def main(args):
+    # configure strategy
+    if args.strategy == 'naive':
+        strategy = NaiveStrategy()
+    elif args.strategy == 'ddp':
+        strategy = DDPStrategy()
+    elif args.strategy == 'colossalai_gemini':
+        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif args.strategy == 'colossalai_zero2':
+        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{args.strategy}"')
+
+    # configure tokenizer
+    if args.model == 'gpt2':
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'bloom':
+        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'opt':
+        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+    else:
+        raise ValueError(f'Unsupported model "{args.model}"')
+
+    # configure Trainer
+    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=32).remote(
+        experience_maker_holder_name_list=["maker1", "maker2"],
+        strategy=strategy,
+        model=args.model,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        train_batch_size=args.train_batch_size,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epochs=args.max_epochs,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+
+    # configure Experience Maker
+    experience_holder_1_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1"],
+        strategy=strategy,
+        experience_batch_size=args.experience_batch_size,
+        kl_coef=0.1,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+    
+    experience_holder_2_ref = ExperienceMakerHolder.options(name="maker2", num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1"],
+        strategy=strategy,
+        experience_batch_size=args.experience_batch_size,
+        kl_coef=0.1,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+
+    # trainer send its actor and critic to experience holders.
+    ray.get(trainer_ref.initialize_remote_makers.remote())
+
+    # configure sampler
+    dataset = pd.read_csv(args.prompt_path)['prompt']
+    sampler = strategy.setup_sampler(dataset)
+
+    def tokenize_fn(texts):
+        # MUST padding to max length to ensure inputs of all ranks have the same length
+        # Different length may lead to hang when using gemini, as different generation steps
+        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
+        return {k: v.cuda() for k, v in batch.items()}
+
+    trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
+    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps // 2+ 3)
+    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps // 2+ 3)
+    
+    ray.get([trainer_done_ref, maker_1_done_ref, maker_2_done_ref])
+
+    # save model checkpoint after fitting
+    trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    # save optimizer checkpoint on all ranks
+    if args.need_optim_ckpt:
+        trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                     only_rank0=False)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('prompt_path')
+    parser.add_argument('--strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
+    parser.add_argument('--pretrain', type=str, default=None)
+    parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
+    parser.add_argument('--need_optim_ckpt', type=bool, default=False)
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_timesteps', type=int, default=10)
+    parser.add_argument('--update_timesteps', type=int, default=10)
+    parser.add_argument('--max_epochs', type=int, default=5)
+    parser.add_argument('--train_batch_size', type=int, default=8)
+    parser.add_argument('--experience_batch_size', type=int, default=8)
+    parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
+
+    parser.add_argument('--debug', action='store_true')
+    args = parser.parse_args()
+    ray.init()
+    main(args)
diff --git a/applications/ChatGPT/examples/2m1t.sh b/applications/ChatGPT/examples/2m1t.sh
new file mode 100644
index 000000000000..614336604d43
--- /dev/null
+++ b/applications/ChatGPT/examples/2m1t.sh
@@ -0,0 +1,21 @@
+# set_n_least_used_CUDA_VISIBLE_DEVICES() {
+#     local n=${1:-"9999"}
+#     echo "GPU Memory Usage:"
+#     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+#         | tail -n +2 \
+#         | nl -v 0 \
+#         | tee /dev/tty \
+#         | sort -g -k 2 \
+#         | awk '{print $1}' \
+#         | head -n $n)
+#     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+#     echo "Now CUDA_VISIBLE_DEVICES is set to:"
+#     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+# }
+# 
+# set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+export CUDA_VISIBLE_DEVICES="0,1,2"
+
+torchrun --standalone 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' --debug

From f791fb7f934b0b9daa0740bb3d7b8006bff92718 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 27 Mar 2023 15:21:05 +0800
Subject: [PATCH 16/36] 2m1t verified

---
 .../chatgpt/experience_maker/detached.py      | 35 +++++++++--------
 .../ChatGPT/chatgpt/replay_buffer/detached.py | 10 ++---
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 15 +++++---
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  6 +--
 applications/ChatGPT/examples/1m1t.py         |  5 ++-
 applications/ChatGPT/examples/1m1t.sh         |  6 ++-
 applications/ChatGPT/examples/2m1t.py         |  7 ++--
 applications/ChatGPT/examples/2m1t.sh         | 38 ++++++++++---------
 8 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 2bb33e9f2d73..0ff35f885791 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -13,8 +13,7 @@
 from threading import Lock
 import time
 
-
-@ray.remote
+@ray.remote(concurrency_groups={"experience_io": 1, "model_io": 1, "compute": 1})
 class ExperienceMakerHolder:
     '''
     Args:
@@ -40,7 +39,7 @@ def __init__(self,
         # Need a trainer to give an actor and a critic via initialize_experience_maker(...)
         actor, critic, reward_model, initial_model = None, None, None, None
         self.experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, self.kl_coef)
-        self.model_visit_lock = Lock()
+        self._model_visit_lock = Lock()
         self.fully_initialized = False
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print('[maker] Waiting for INIT')
@@ -49,8 +48,15 @@ def _get_ready(self):
         while not self.fully_initialized:
             time.sleep(1.0)
 
+    def update_target_trainer_list(self, detached_trainer_name_list):
+        self.target_trainer_list = []
+        for name in detached_trainer_name_list:
+            self.target_trainer_list.append(ray.get_actor(name))
+
     # copy from ../trainer/base.py
+    @ray.method(concurrency_group="compute")
     def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experience:
+        self._get_ready()
         if isinstance(inputs, Tensor):
             return self.experience_maker.make_experience(inputs, **self.generate_kwargs)
         elif isinstance(inputs, dict):
@@ -58,14 +64,8 @@ def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experien
         else:
             raise ValueError(f'Unsupported input type "{type(inputs)}"')
 
-    def update_target_trainer_list(self, detached_trainer_name_list):
-        self.target_trainer_list = []
-        for name in detached_trainer_name_list:
-            self.target_trainer_list.append(ray.get_actor(name))
-
-    def make_and_send(self, inputs):
-        self._get_ready()
-        experience = self._make_experience(inputs)
+    @ray.method(concurrency_group="experience_io")
+    def _send_experience(self, experience):
         # choose a trainer that has the least experience batch in its detached_replay_buffer
         chosen_trainer = None
         min_length = None
@@ -93,10 +93,12 @@ def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[
                 inputs = tokenizer(rand_prompts)
             else:
                 inputs = rand_prompts
-            self.model_visit_lock.acquire()
-            self.make_and_send(inputs)
-            self.model_visit_lock.release()
+            self._model_visit_lock.acquire()
+            experience = self._make_experience(inputs=inputs)
+            self._model_visit_lock.release()
+            self._send_experience(experience=experience)
 
+    @ray.method(concurrency_group="model_io")
     def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
         '''
         called by trainer. Only once.
@@ -119,15 +121,16 @@ def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
         self.experience_maker.reward_model = reward_model
         self.fully_initialized = True
 
+    @ray.method(concurrency_group="model_io")
     def update_experience_maker(self, new_actor: Actor, new_critic: Critic):
         '''
             called by trainer
         '''
         # TODO: reduce malloc
-        self.model_visit_lock.acquire()
+        self._model_visit_lock.acquire()
         with torch.no_grad():
             if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
                 print("[maker] UPDATE ")
             self.experience_maker.actor = new_actor
             self.experience_maker.critic = new_critic
-        self.model_visit_lock.release()
+        self._model_visit_lock.release()
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index 59909379f4a4..ca0161c17dd6 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -38,7 +38,7 @@ def __init__(self, sample_batch_size: int, tp_world_size: int = 1, limit : int =
         self.tp_world_size = tp_world_size
         self.worker_state = [False] * self.tp_world_size
         self.held_sample = None
-        self.worker_state_lock = Lock()
+        self._worker_state_lock = Lock()
 
     @torch.no_grad()
     def append(self, experience: Experience) -> None:
@@ -64,19 +64,19 @@ def clear(self) -> None:
      
     @torch.no_grad()
     def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
-        self.worker_state_lock.acquire()
+        self._worker_state_lock.acquire()
         if not any(self.worker_state):
             self.held_sample = self._sample_and_erase()
         self.worker_state[worker_rank] = True
-        self.worker_state_lock.release()
+        self._worker_state_lock.release()
 
         ret = copy.deepcopy(self.held_sample)
         ret.to_device(to_device)
         
-        self.worker_state_lock.acquire()
+        self._worker_state_lock.acquire()
         if all(self.worker_state):
             self.worker_state = [False] * self.tp_world_size
-        self.worker_state_lock.release()
+        self._worker_state_lock.release()
         return ret
         
     @torch.no_grad()
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 6cddc4017a11..5330aa74a32c 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -15,6 +15,7 @@
 
 import ray
 import sys
+import time
 
 # @ray.remote
 
@@ -43,7 +44,7 @@ def __init__(self,
                  buffer_limit: int = 0,
                  buffer_cpu_offload: bool = True,
                  experience_batch_size: int = 8,
-                 max_epochs: int = 10,
+                 max_epochs: int = 1,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs) -> None:
@@ -77,7 +78,7 @@ def _learn(self):
         for _ in pbar:
             if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
                 print("[trainer] sampling exp")
-            experience = self.detached_replay_buffer.sample()
+            experience = self._buffer_sample()
             if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
                 print("[trainer] training step")
             metrics = self.training_step(experience)
@@ -93,21 +94,25 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
                 self._learn()
                 self._update_remote_makers()
             self._on_episode_end(episode)
-        self._on_fit_end()
+        self._on_fit_end() 
 
-    @ray.method(concurrency_group="io")
+    @ray.method(concurrency_group="experience_io")
     def buffer_get_length(self):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print("[trainer] telling length")
         return self.detached_replay_buffer.get_length()
 
-    @ray.method(concurrency_group="io")
+    @ray.method(concurrency_group="experience_io")
     def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print(f"[trainer] receiving exp. Current buffer length: {self.detached_replay_buffer.get_length()}")
         self.detached_replay_buffer.append(experience)
+        
+    @ray.method(concurrency_group="experience_io")
+    def _buffer_sample(self):
+        return self.detached_replay_buffer.sample()
 
     def _on_fit_start(self) -> None:
         for callback in self.callbacks:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 3605e8a17be8..d8c370058de3 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -23,7 +23,7 @@
 import ray
 import copy
 
-@ray.remote(concurrency_groups={"io": 4, "compute": 2})
+@ray.remote(concurrency_groups={"experience_io": 3, "model_io": 1, "compute": 1})
 class DetachedPPOTrainer(DetachedTrainer):
     '''
         Detached Trainer for PPO algorithm
@@ -88,7 +88,7 @@ def __init__(self,
                          callbacks=callbacks,
                          **generate_kwargs)
 
-    @ray.method(concurrency_group="io")
+    @ray.method(concurrency_group="model_io")
     def _update_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
@@ -98,7 +98,7 @@ def _update_remote_makers(self):
                 with torch.no_grad():
                     ray.get(target_holder.update_experience_maker.remote(self.actor, self.critic))
                     
-    @ray.method(concurrency_group="io")
+    @ray.method(concurrency_group="model_io")
     def initialize_remote_makers(self):
         # TODO: balance duties
         if is_rank_0():
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index d9f586599798..4727493dabdf 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -41,7 +41,7 @@ def main(args):
         raise ValueError(f'Unsupported model "{args.model}"')
 
     # configure Trainer
-    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=3).remote(
+    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1"],
         strategy=strategy,
         model=args.model,
@@ -91,7 +91,8 @@ def tokenize_fn(texts):
         return {k: v.cuda() for k, v in batch.items()}
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
-    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps + 3)
+    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
+    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
 
     ray.get([trainer_done_ref, maker_done_ref])
 
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index dc96007fbefa..439af3640561 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -17,5 +17,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 # export CUDA_VISIBLE_DEVICES="0,1,2,3"
 
-torchrun --standalone 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' # --debug
+torchrun --standalone 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --max_epochs 10  # --debug
diff --git a/applications/ChatGPT/examples/2m1t.py b/applications/ChatGPT/examples/2m1t.py
index 8da6445f7d8c..6c54a95f1b2f 100644
--- a/applications/ChatGPT/examples/2m1t.py
+++ b/applications/ChatGPT/examples/2m1t.py
@@ -41,7 +41,7 @@ def main(args):
         raise ValueError(f'Unsupported model "{args.model}"')
 
     # configure Trainer
-    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=32).remote(
+    trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1", "maker2"],
         strategy=strategy,
         model=args.model,
@@ -106,8 +106,9 @@ def tokenize_fn(texts):
         return {k: v.cuda() for k, v in batch.items()}
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
-    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps // 2+ 3)
-    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(sampler, tokenize_fn, times=args.num_episodes * args.max_timesteps * args.update_timesteps // 2+ 3)
+    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs // 2 + 3 # +3 for fault tolerance
+    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
+    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
     
     ray.get([trainer_done_ref, maker_1_done_ref, maker_2_done_ref])
 
diff --git a/applications/ChatGPT/examples/2m1t.sh b/applications/ChatGPT/examples/2m1t.sh
index 614336604d43..d95d4677da92 100644
--- a/applications/ChatGPT/examples/2m1t.sh
+++ b/applications/ChatGPT/examples/2m1t.sh
@@ -1,21 +1,23 @@
-# set_n_least_used_CUDA_VISIBLE_DEVICES() {
-#     local n=${1:-"9999"}
-#     echo "GPU Memory Usage:"
-#     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
-#         | tail -n +2 \
-#         | nl -v 0 \
-#         | tee /dev/tty \
-#         | sort -g -k 2 \
-#         | awk '{print $1}' \
-#         | head -n $n)
-#     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
-#     echo "Now CUDA_VISIBLE_DEVICES is set to:"
-#     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-# }
-# 
-# set_n_least_used_CUDA_VISIBLE_DEVICES 2
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
 
-export CUDA_VISIBLE_DEVICES="0,1,2"
+set_n_least_used_CUDA_VISIBLE_DEVICES 3
+
+# export CUDA_VISIBLE_DEVICES="0,1,2,3"
 
 torchrun --standalone 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' --debug
+    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --max_epochs 10  # --debug

From f4687247588122287d0c430a3a66cbb3afd2b4b8 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 27 Mar 2023 15:36:10 +0800
Subject: [PATCH 17/36] do not use torchrun

---
 applications/ChatGPT/examples/1m1t.sh | 4 ++--
 applications/ChatGPT/examples/2m1t.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index 439af3640561..36258f6da795 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -17,7 +17,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 # export CUDA_VISIBLE_DEVICES="0,1,2,3"
 
-torchrun --standalone 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+python 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  # --debug
+    --max_epochs 10  # --debug
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/2m1t.sh b/applications/ChatGPT/examples/2m1t.sh
index d95d4677da92..f85597c4b87f 100644
--- a/applications/ChatGPT/examples/2m1t.sh
+++ b/applications/ChatGPT/examples/2m1t.sh
@@ -17,7 +17,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
 # export CUDA_VISIBLE_DEVICES="0,1,2,3"
 
-torchrun --standalone 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+python 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug

From 12b94f753fafec514447a0020c3d38572cbf1b11 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 27 Mar 2023 16:52:11 +0800
Subject: [PATCH 18/36] working on 2m2t

---
 applications/ChatGPT/examples/2m2t.py | 45 +++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 applications/ChatGPT/examples/2m2t.py

diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/ChatGPT/examples/2m2t.py
new file mode 100644
index 000000000000..7e38229f3de3
--- /dev/null
+++ b/applications/ChatGPT/examples/2m2t.py
@@ -0,0 +1,45 @@
+import argparse
+from copy import deepcopy
+
+import pandas as pd
+import torch
+from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from torch.optim import Adam
+from transformers import AutoTokenizer, BloomTokenizerFast
+from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
+
+from colossalai.nn.optimizer import HybridAdam
+
+import ray
+import os
+
+def launch_trainer(args, env_info):
+    # manually set environ
+    os.environ["RANK"] = env_info.rank
+    os.environ["LOCAL_RANK"] = env_info.local_rank
+    os.environ["WORLD_SIZE"] = env_info.world_size
+    os.environ["MASTER_ADDR"] = env_info.master_addr
+    os.environ["MASTER_PORT"] = env_info.master_port
+    
+    
+    
+    # configure Trainer strategy
+    if args.strategy == 'naive':
+        trainer_strategy = NaiveStrategy()
+    elif args.strategy == 'ddp':
+        trainer_strategy = DDPStrategy()
+    elif args.strategy == 'colossalai_gemini':
+        trainer_strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif args.strategy == 'colossalai_zero2':
+        trainer_strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{args.strategy}"')
+    
+    
+def main(args):
+    
+
+    # configure Maker strategy
+    maker_strategy = NaiveStrategy()
\ No newline at end of file

From 05df7d780fea26cc8edcfa73d313c271c133144c Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 27 Mar 2023 18:46:37 +0800
Subject: [PATCH 19/36] working on 2m2t

---
 .../ChatGPT/chatgpt/trainer/detached_base.py  |   5 -
 applications/ChatGPT/examples/2m2t.py         | 209 ++++++++++++++++--
 applications/ChatGPT/examples/2m2t.sh         |   0
 3 files changed, 192 insertions(+), 22 deletions(-)
 create mode 100644 applications/ChatGPT/examples/2m2t.sh

diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 5330aa74a32c..6935a7f3a1cf 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -14,11 +14,6 @@
 from .utils import is_rank_0
 
 import ray
-import sys
-import time
-
-# @ray.remote
-
 
 class DetachedTrainer(ABC):
     '''
diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/ChatGPT/examples/2m2t.py
index 7e38229f3de3..322e60837fd3 100644
--- a/applications/ChatGPT/examples/2m2t.py
+++ b/applications/ChatGPT/examples/2m2t.py
@@ -13,33 +13,208 @@
 from colossalai.nn.optimizer import HybridAdam
 
 import ray
+
 import os
+import socket
+
+def get_free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('', 0))
+        return s.getsockname()[1]
+
+def get_local_ip():
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
+        s.connect(('8.8.8.8', 80))
+        return s.getsockname()[0]
 
 def launch_trainer(args, env_info):
-    # manually set environ
-    os.environ["RANK"] = env_info.rank
-    os.environ["LOCAL_RANK"] = env_info.local_rank
-    os.environ["WORLD_SIZE"] = env_info.world_size
-    os.environ["MASTER_ADDR"] = env_info.master_addr
-    os.environ["MASTER_PORT"] = env_info.master_port
+    # manually set environs
+    os.environ["RANK"] = env_info['rank']
+    os.environ["LOCAL_RANK"] = env_info['local_rank']
+    os.environ["WORLD_SIZE"] = env_info['world_size']
+    os.environ['MASTER_PORT'] = get_free_port()
+    os.environ['MASTER_ADDR'] = get_local_ip()
+    rank = int(os.environ['RANK'])
+
+    # configure Trainer strategy
+    # ! Supposed DDP !
+    if args.trainer_strategy == 'naive':
+        strategy = NaiveStrategy()
+    elif args.trainer_strategy == 'ddp':
+        strategy = DDPStrategy()
+    elif args.trainer_strategy == 'colossalai_gemini':
+        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif args.trainer_strategy == 'colossalai_zero2':
+        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{args.trainer_strategy}"')
+    
+    # configure tokenizer
+    if args.model == 'gpt2':
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'bloom':
+        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'opt':
+        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+    else:
+        raise ValueError(f'Unsupported model "{args.model}"')
     
+    # configure Trainer
+    if rank == 0:
+        name = "trainer1"
+    elif rank == 1:
+        name = "trainer2"
+    trainer_ref = DetachedPPOTrainer.options(name=name, num_gpus=1, max_concurrency=2).remote(
+        experience_maker_holder_name_list=["maker1", "maker2"],
+        strategy=strategy,
+        model=args.model,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        train_batch_size=args.train_batch_size,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epochs=args.max_epochs,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
     
+    trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
+    ray.get(trainer_done_ref)
+    
+    # save model checkpoint after fitting
+    trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    # save optimizer checkpoint on all ranks
+    if args.need_optim_ckpt:
+        trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                     only_rank0=False)
+
+def launch_maker(args, env_info):
+    os.environ["RANK"] = env_info['rank']
+    os.environ["LOCAL_RANK"] = env_info['local_rank']
+    os.environ["WORLD_SIZE"] = env_info['world_size']
+    os.environ['MASTER_PORT'] = get_free_port()
+    os.environ['MASTER_ADDR'] = get_local_ip()
+    rank = int(os.environ['RANK'])
     
     # configure Trainer strategy
-    if args.strategy == 'naive':
-        trainer_strategy = NaiveStrategy()
-    elif args.strategy == 'ddp':
-        trainer_strategy = DDPStrategy()
-    elif args.strategy == 'colossalai_gemini':
-        trainer_strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.strategy == 'colossalai_zero2':
-        trainer_strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    # ! Supposed naive !
+    if args.maker_strategy == 'naive':
+        strategy = NaiveStrategy()
+    elif args.maker_strategy == 'ddp':
+        strategy = DDPStrategy()
+    elif args.maker_strategy == 'colossalai_gemini':
+        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif args.maker_strategy == 'colossalai_zero2':
+        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{args.maker_strategy}"')
+
+     # configure tokenizer
+    if args.model == 'gpt2':
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'bloom':
+        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'opt':
+        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
     else:
-        raise ValueError(f'Unsupported strategy "{args.strategy}"')
+        raise ValueError(f'Unsupported model "{args.model}"')
+
+    # configure Experience Maker
+    if rank == 0:
+        name = "maker1"
+    elif rank == 1:
+        name = "maker2"
+    experience_holder_ref = ExperienceMakerHolder.options(name=name, num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1"],
+        strategy=strategy,
+        experience_batch_size=args.experience_batch_size,
+        kl_coef=0.1,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+    # configure sampler
+    dataset = pd.read_csv(args.prompt_path)['prompt']
+    sampler = strategy.setup_sampler(dataset)
+    def tokenize_fn(texts):
+        # MUST padding to max length to ensure inputs of all ranks have the same length
+        # Different length may lead to hang when using gemini, as different generation steps
+        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
+        return {k: v.cuda() for k, v in batch.items()}
     
+    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
+    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
     
+    ray.get(maker_done_ref)
+
 def main(args):
+    # trainer_env_info
+    env_info_trainer_1 = {'local_rank' : 0,
+                  'rank' : 0,
+                  'world_size' : 2}
+    env_info_trainer_2 = {'local_rank' : 0,
+                  'rank' : 1,
+                  'world_size' : 2}
+    # maker_env_info
+    env_info_maker_1 = {'local_rank' : 0,
+                  'rank' : 0,
+                  'world_size' : 2}
+    env_info_maker_2 = {'local_rank' : 0,
+                  'rank' : 1,
+                  'world_size' : 2}
+
+    process_trainer_1 = torch.multiprocessing.Process(target=launch_trainer, args=(args, env_info_trainer_1))
+    process_trainer_2 = torch.multiprocessing.Process(target=launch_trainer, args=(args, env_info_trainer_2))
+    process_maker_1 = torch.multiprocessing.Process(target=launch_maker, args=(args, env_info_maker_1))
+    process_maker_2 = torch.multiprocessing.Process(target=launch_maker, args=(args, env_info_maker_2))
     
+    process_trainer_1.start()
+    process_trainer_2.start()
+    process_maker_1.start()
+    process_maker_2.start()
+    
+    process_trainer_1.join()
+    process_trainer_2.join()
+    process_maker_1.join()
+    process_maker_2.join()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('prompt_path')
+    parser.add_argument('--trainer_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--maker_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
+    parser.add_argument('--pretrain', type=str, default=None)
+    parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
+    parser.add_argument('--need_optim_ckpt', type=bool, default=False)
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_timesteps', type=int, default=10)
+    parser.add_argument('--update_timesteps', type=int, default=10)
+    parser.add_argument('--max_epochs', type=int, default=5)
+    parser.add_argument('--train_batch_size', type=int, default=8)
+    parser.add_argument('--experience_batch_size', type=int, default=8)
+    parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
 
-    # configure Maker strategy
-    maker_strategy = NaiveStrategy()
\ No newline at end of file
+    parser.add_argument('--debug', action='store_true')
+    args = parser.parse_args()
+    ray.init()
+    main(args)
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/ChatGPT/examples/2m2t.sh
new file mode 100644
index 000000000000..e69de29bb2d1

From 07736974b2335d2c39a1945271a0cde45f3d1e23 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 28 Mar 2023 16:40:09 +0800
Subject: [PATCH 20/36] initialize strategy in ray actor env

---
 .../chatgpt/experience_maker/detached.py      |  33 +++---
 .../ChatGPT/chatgpt/trainer/detached_base.py  |   5 +-
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  11 +-
 applications/ChatGPT/chatgpt/trainer/utils.py |  16 ++-
 applications/ChatGPT/examples/1m1t.py         |  33 ++----
 applications/ChatGPT/examples/1m1t.sh         |   9 +-
 applications/ChatGPT/examples/2m1t.py         |  32 ++---
 applications/ChatGPT/examples/2m1t.sh         |   5 +-
 applications/ChatGPT/examples/2m2t.py         | 109 +++++++++++-------
 applications/ChatGPT/examples/2m2t.sh         |  24 ++++
 10 files changed, 161 insertions(+), 116 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 0ff35f885791..cd529d4039e8 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -8,10 +8,12 @@
 from chatgpt.models.base import Actor, Critic, RewardModel
 from chatgpt.trainer.strategies.sampler import DistributedSampler
 from chatgpt.trainer.strategies import Strategy
-from chatgpt.trainer.utils import is_rank_0, get_cuda_actor_critic_from_args
+from chatgpt.trainer.utils import is_rank_0, get_strategy_from_args
 from copy import deepcopy
 from threading import Lock
 import time
+import os
+
 
 @ray.remote(concurrency_groups={"experience_io": 1, "model_io": 1, "compute": 1})
 class ExperienceMakerHolder:
@@ -25,14 +27,14 @@ class ExperienceMakerHolder:
 
     def __init__(self,
                  detached_trainer_name_list: List[str],
-                 strategy: Strategy,
+                 strategy: str,
                  experience_batch_size: int = 8,
                  kl_coef: float = 0.1,
                  **generate_kwargs):
         self.target_trainer_list = []
         for name in detached_trainer_name_list:
-            self.target_trainer_list.append(ray.get_actor(name))
-        self.strategy = strategy
+            self.target_trainer_list.append(ray.get_actor(name, namespace=os.environ["RAY_NAMESPACE"]))
+        self.strategy = get_strategy_from_args(strategy)
         self.experience_batch_size = experience_batch_size
         self.kl_coef = kl_coef
         self.generate_kwargs = generate_kwargs
@@ -85,8 +87,9 @@ def _send_experience(self, experience):
             print("[maker] sending exp")
         chosen_trainer.buffer_append.remote(experience)
 
-    def workingloop(self, sampler: DistributedSampler, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
+    def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
         self._get_ready()
+        sampler = self.strategy.setup_sampler(dataset)
         for _ in range(times):
             rand_prompts = sampler.sample(self.experience_batch_size)
             if tokenizer is not None:
@@ -108,17 +111,19 @@ def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
             return
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print('[maker] INIT')
-        actor = init_actor
-        critic = init_critic
+        with torch.no_grad():
+            actor = init_actor
+            critic = init_critic
 
-        with self.strategy.model_init_context():
-            initial_model = deepcopy(actor)
-            reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())
+            with self.strategy.model_init_context():
+                initial_model = deepcopy(actor)
+                reward_model = RewardModel(deepcopy(critic.model), 
+                                           deepcopy(critic.value_head)).to(torch.cuda.current_device())
 
-        self.experience_maker.actor = actor
-        self.experience_maker.critic = critic
-        self.experience_maker.initial_model = initial_model
-        self.experience_maker.reward_model = reward_model
+            self.experience_maker.actor = actor
+            self.experience_maker.critic = critic
+            self.experience_maker.initial_model = initial_model
+            self.experience_maker.reward_model = reward_model
         self.fully_initialized = True
 
     @ray.method(concurrency_group="model_io")
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 6935a7f3a1cf..3575e60ba264 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -14,6 +14,7 @@
 from .utils import is_rank_0
 
 import ray
+import os
 
 class DetachedTrainer(ABC):
     '''
@@ -34,7 +35,6 @@ class DetachedTrainer(ABC):
 
     def __init__(self,
                  experience_maker_holder_name_list: List[str],
-                 strategy: Strategy,  # TODO: DetachedStrategy
                  train_batch_size: int = 8,
                  buffer_limit: int = 0,
                  buffer_cpu_offload: bool = True,
@@ -44,7 +44,6 @@ def __init__(self,
                  callbacks: List[Callback] = [],
                  **generate_kwargs) -> None:
         super().__init__()
-        self.strategy = strategy
         self.detached_replay_buffer = DetachedReplayBuffer(train_batch_size, limit=buffer_limit, cpu_offload=buffer_cpu_offload)
         self.experience_batch_size = experience_batch_size
         self.max_epochs = max_epochs
@@ -58,7 +57,7 @@ def update_target_holder_list(self, experience_maker_holder_name_list):
         self.target_holder_name_list = experience_maker_holder_name_list
         self.target_holder_list = []
         for name in self.target_holder_name_list:
-            self.target_holder_list.append(ray.get_actor(name))
+            self.target_holder_list.append(ray.get_actor(name, namespace=os.environ["RAY_NAMESPACE"]))
 
     @abstractmethod
     def _update_remote_makers(self):
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index d8c370058de3..b338fc93881e 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -18,7 +18,7 @@
 from .detached_base import DetachedTrainer
 from .callbacks import Callback
 from .strategies import Strategy
-from .utils import is_rank_0, get_cuda_actor_critic_from_args
+from .utils import is_rank_0, get_cuda_actor_critic_from_args, get_strategy_from_args
 
 import ray
 import copy
@@ -47,7 +47,7 @@ class DetachedPPOTrainer(DetachedTrainer):
 
     def __init__(self,
                  experience_maker_holder_name_list: List[str],
-                 strategy: Strategy,
+                 strategy: str,
                  model: str,
                  pretrained: str = None,
                  lora_rank: int = 0,
@@ -61,10 +61,12 @@ def __init__(self,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs) -> None:
-        self.strategy = strategy
+        # configure strategy
+        self.strategy = get_strategy_from_args(strategy)
         # configure models, loss and optimizers
         with self.strategy.model_init_context():
             self.actor, self.critic = get_cuda_actor_critic_from_args(model, pretrained, lora_rank)
+
         self.actor_loss_fn = PolicyLoss(eps_clip)
         self.critic_loss_fn = ValueLoss(value_clip)
         if isinstance(self.strategy, ColossalAIStrategy):
@@ -75,10 +77,9 @@ def __init__(self,
             self.critic_optim = Adam(self.critic.parameters(), lr=5e-6)
         (self.actor, self.actor_optim), (self.critic, self.critic_optim) = \
             self.strategy.prepare((self.actor, self.actor_optim), (self.critic, self.critic_optim))
-        generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, self.actor)
+        generate_kwargs = _set_default_generate_kwargs(self.strategy, generate_kwargs, self.actor)
 
         super().__init__(experience_maker_holder_name_list,
-                         strategy=strategy,
                          train_batch_size=train_batch_size,
                          buffer_limit=buffer_limit,
                          buffer_cpu_offload=buffer_cpu_offload,
diff --git a/applications/ChatGPT/chatgpt/trainer/utils.py b/applications/ChatGPT/chatgpt/trainer/utils.py
index bda3292bc146..7bc304dedae2 100644
--- a/applications/ChatGPT/chatgpt/trainer/utils.py
+++ b/applications/ChatGPT/chatgpt/trainer/utils.py
@@ -2,6 +2,7 @@
 from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
 from chatgpt.models.gpt import GPTActor, GPTCritic
 from chatgpt.models.opt import OPTActor, OPTCritic
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 import torch
 def is_rank_0() -> bool:
     return not dist.is_initialized() or dist.get_rank() == 0
@@ -19,4 +20,17 @@ def get_cuda_actor_critic_from_args(model:str, pretrained: str = None, lora_rank
         critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
     else:
         raise ValueError(f'Unsupported model "{model}"')
-    return actor, critic
\ No newline at end of file
+    return actor, critic
+
+def get_strategy_from_args(strategy:str):
+    if strategy == 'naive':
+        strategy_ = NaiveStrategy()
+    elif strategy == 'ddp':
+        strategy_ = DDPStrategy()
+    elif strategy == 'colossalai_gemini':
+        strategy_ = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif strategy == 'colossalai_zero2':
+        strategy_ = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{strategy}"')
+    return strategy_
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 4727493dabdf..624bbe02bf97 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -13,21 +13,9 @@
 from colossalai.nn.optimizer import HybridAdam
 
 import ray
-
+import os
 
 def main(args):
-    # configure strategy
-    if args.strategy == 'naive':
-        strategy = NaiveStrategy()
-    elif args.strategy == 'ddp':
-        strategy = DDPStrategy()
-    elif args.strategy == 'colossalai_gemini':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.strategy == 'colossalai_zero2':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    else:
-        raise ValueError(f'Unsupported strategy "{args.strategy}"')
-
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -43,7 +31,7 @@ def main(args):
     # configure Trainer
     trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1"],
-        strategy=strategy,
+        strategy=args.trainer_strategy,
         model=args.model,
         pretrained=args.pretrain,
         lora_rank=args.lora_rank,
@@ -64,7 +52,7 @@ def main(args):
     # configure Experience Maker
     experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1"],
-        strategy=strategy,
+        strategy=args.maker_strategy,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
@@ -77,12 +65,11 @@ def main(args):
         debug=args.debug,
     )
 
-    # trainer send its actor and critic to experience holder.
+    # trainer send its actor and critic to experience holders.
     ray.get(trainer_ref.initialize_remote_makers.remote())
 
     # configure sampler
     dataset = pd.read_csv(args.prompt_path)['prompt']
-    sampler = strategy.setup_sampler(dataset)
 
     def tokenize_fn(texts):
         # MUST padding to max length to ensure inputs of all ranks have the same length
@@ -92,8 +79,8 @@ def tokenize_fn(texts):
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
     num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
-    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
-
+    maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
+    
     ray.get([trainer_done_ref, maker_done_ref])
 
     # save model checkpoint after fitting
@@ -103,11 +90,13 @@ def tokenize_fn(texts):
         trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
                                                      only_rank0=False)
 
-
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('prompt_path')
-    parser.add_argument('--strategy',
+    parser.add_argument('--trainer_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--maker_strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
                         default='naive')
     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
@@ -124,5 +113,5 @@ def tokenize_fn(texts):
 
     parser.add_argument('--debug', action='store_true')
     args = parser.parse_args()
-    ray.init()
+    ray.init(namespace=os.environ["RAY_NAMESPACE"])
     main(args)
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index 36258f6da795..927ab5a1c0ac 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -13,11 +13,12 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
 }
 
-set_n_least_used_CUDA_VISIBLE_DEVICES 2
+set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
-# export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export RAY_NAMESPACE="lccsr"
 
 python 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --trainer_strategy naive --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  # --debug
\ No newline at end of file
+    --max_epochs 10  # --debug
diff --git a/applications/ChatGPT/examples/2m1t.py b/applications/ChatGPT/examples/2m1t.py
index 6c54a95f1b2f..1682325699d4 100644
--- a/applications/ChatGPT/examples/2m1t.py
+++ b/applications/ChatGPT/examples/2m1t.py
@@ -13,21 +13,9 @@
 from colossalai.nn.optimizer import HybridAdam
 
 import ray
-
+import os
 
 def main(args):
-    # configure strategy
-    if args.strategy == 'naive':
-        strategy = NaiveStrategy()
-    elif args.strategy == 'ddp':
-        strategy = DDPStrategy()
-    elif args.strategy == 'colossalai_gemini':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.strategy == 'colossalai_zero2':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    else:
-        raise ValueError(f'Unsupported strategy "{args.strategy}"')
-
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -43,7 +31,7 @@ def main(args):
     # configure Trainer
     trainer_ref = DetachedPPOTrainer.options(name="trainer1", num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1", "maker2"],
-        strategy=strategy,
+        strategy=args.trainer_strategy,
         model=args.model,
         pretrained=args.pretrain,
         lora_rank=args.lora_rank,
@@ -64,7 +52,7 @@ def main(args):
     # configure Experience Maker
     experience_holder_1_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1"],
-        strategy=strategy,
+        strategy=args.maker_strategy,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
@@ -79,7 +67,7 @@ def main(args):
     
     experience_holder_2_ref = ExperienceMakerHolder.options(name="maker2", num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1"],
-        strategy=strategy,
+        strategy=args.maker_strategy,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
@@ -97,7 +85,6 @@ def main(args):
 
     # configure sampler
     dataset = pd.read_csv(args.prompt_path)['prompt']
-    sampler = strategy.setup_sampler(dataset)
 
     def tokenize_fn(texts):
         # MUST padding to max length to ensure inputs of all ranks have the same length
@@ -107,8 +94,8 @@ def tokenize_fn(texts):
 
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
     num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs // 2 + 3 # +3 for fault tolerance
-    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
-    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
+    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
+    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
     
     ray.get([trainer_done_ref, maker_1_done_ref, maker_2_done_ref])
 
@@ -122,7 +109,10 @@ def tokenize_fn(texts):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('prompt_path')
-    parser.add_argument('--strategy',
+    parser.add_argument('--trainer_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--maker_strategy',
                         choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
                         default='naive')
     parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
@@ -139,5 +129,5 @@ def tokenize_fn(texts):
 
     parser.add_argument('--debug', action='store_true')
     args = parser.parse_args()
-    ray.init()
+    ray.init(namespace=os.environ["RAY_NAMESPACE"])
     main(args)
diff --git a/applications/ChatGPT/examples/2m1t.sh b/applications/ChatGPT/examples/2m1t.sh
index f85597c4b87f..f98b75dab12a 100644
--- a/applications/ChatGPT/examples/2m1t.sh
+++ b/applications/ChatGPT/examples/2m1t.sh
@@ -15,9 +15,10 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
-# export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export RAY_NAMESPACE="lccsr"
 
 python 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --trainer_strategy naive --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug
diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/ChatGPT/examples/2m2t.py
index 322e60837fd3..12114f2b99b5 100644
--- a/applications/ChatGPT/examples/2m2t.py
+++ b/applications/ChatGPT/examples/2m2t.py
@@ -16,28 +16,33 @@
 
 import os
 import socket
+import multiprocessing
+
 
 def get_free_port():
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
         s.bind(('', 0))
         return s.getsockname()[1]
 
+
 def get_local_ip():
     with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
         s.connect(('8.8.8.8', 80))
         return s.getsockname()[0]
 
+
 def launch_trainer(args, env_info):
+    ray.init()
     # manually set environs
     os.environ["RANK"] = env_info['rank']
     os.environ["LOCAL_RANK"] = env_info['local_rank']
     os.environ["WORLD_SIZE"] = env_info['world_size']
-    os.environ['MASTER_PORT'] = get_free_port()
-    os.environ['MASTER_ADDR'] = get_local_ip()
+    os.environ['MASTER_PORT'] = env_info['master_port']
+    os.environ['MASTER_ADDR'] = env_info['master_addr']
     rank = int(os.environ['RANK'])
 
     # configure Trainer strategy
-    # ! Supposed DDP !
+    # ! Supposed to be DDP !
     if args.trainer_strategy == 'naive':
         strategy = NaiveStrategy()
     elif args.trainer_strategy == 'ddp':
@@ -48,7 +53,7 @@ def launch_trainer(args, env_info):
         strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
     else:
         raise ValueError(f'Unsupported strategy "{args.trainer_strategy}"')
-    
+
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -60,13 +65,13 @@ def launch_trainer(args, env_info):
         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
-    
+
     # configure Trainer
     if rank == 0:
         name = "trainer1"
     elif rank == 1:
         name = "trainer2"
-    trainer_ref = DetachedPPOTrainer.options(name=name, num_gpus=1, max_concurrency=2).remote(
+    trainer_ref = DetachedPPOTrainer.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1", "maker2"],
         strategy=strategy,
         model=args.model,
@@ -85,10 +90,13 @@ def launch_trainer(args, env_info):
         eos_token_id=tokenizer.eos_token_id,
         debug=args.debug,
     )
-    
+
+    # trainer send its actor and critic to experience holder.
+    ray.get(trainer_ref.initialize_remote_makers.remote())
+
     trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
     ray.get(trainer_done_ref)
-    
+
     # save model checkpoint after fitting
     trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
     # save optimizer checkpoint on all ranks
@@ -96,16 +104,19 @@ def launch_trainer(args, env_info):
         trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
                                                      only_rank0=False)
 
+
+
 def launch_maker(args, env_info):
+    ray.init()
     os.environ["RANK"] = env_info['rank']
     os.environ["LOCAL_RANK"] = env_info['local_rank']
     os.environ["WORLD_SIZE"] = env_info['world_size']
-    os.environ['MASTER_PORT'] = get_free_port()
-    os.environ['MASTER_ADDR'] = get_local_ip()
+    os.environ['MASTER_PORT'] = env_info['master_port']
+    os.environ['MASTER_ADDR'] = env_info['master_addr']
     rank = int(os.environ['RANK'])
-    
+
     # configure Trainer strategy
-    # ! Supposed naive !
+    # ! Supposed to be DDP !
     if args.maker_strategy == 'naive':
         strategy = NaiveStrategy()
     elif args.maker_strategy == 'ddp':
@@ -134,8 +145,8 @@ def launch_maker(args, env_info):
         name = "maker1"
     elif rank == 1:
         name = "maker2"
-    experience_holder_ref = ExperienceMakerHolder.options(name=name, num_gpus=1, max_concurrency=2).remote(
-        detached_trainer_name_list=["trainer1"],
+    experience_holder_ref = ExperienceMakerHolder.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1", "trainer2"],
         strategy=strategy,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
@@ -151,47 +162,58 @@ def launch_maker(args, env_info):
     # configure sampler
     dataset = pd.read_csv(args.prompt_path)['prompt']
     sampler = strategy.setup_sampler(dataset)
+
     def tokenize_fn(texts):
         # MUST padding to max length to ensure inputs of all ranks have the same length
         # Different length may lead to hang when using gemini, as different generation steps
         batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
         return {k: v.cuda() for k, v in batch.items()}
-    
-    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3 # +3 for fault tolerance
+
+    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3  # +3 for fault tolerance
     maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
-    
+
     ray.get(maker_done_ref)
 
+def spawn_fn(rank, args, env_info_list):
+    if rank == 0 or rank == 1:
+        launch_trainer(args, env_info_list[rank])
+    elif rank == 2 or rank == 3:
+        launch_maker(args, env_info_list[rank])
+    
+
 def main(args):
+    master_addr = str(get_local_ip())
     # trainer_env_info
-    env_info_trainer_1 = {'local_rank' : 0,
-                  'rank' : 0,
-                  'world_size' : 2}
-    env_info_trainer_2 = {'local_rank' : 0,
-                  'rank' : 1,
-                  'world_size' : 2}
+    trainer_port = str(get_free_port())
+    env_info_trainer_1 = {'local_rank' : '0',
+                          'rank' : '0',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    env_info_trainer_2 = {'local_rank' : '0',
+                          'rank' : '1',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
     # maker_env_info
-    env_info_maker_1 = {'local_rank' : 0,
-                  'rank' : 0,
-                  'world_size' : 2}
-    env_info_maker_2 = {'local_rank' : 0,
-                  'rank' : 1,
-                  'world_size' : 2}
-
-    process_trainer_1 = torch.multiprocessing.Process(target=launch_trainer, args=(args, env_info_trainer_1))
-    process_trainer_2 = torch.multiprocessing.Process(target=launch_trainer, args=(args, env_info_trainer_2))
-    process_maker_1 = torch.multiprocessing.Process(target=launch_maker, args=(args, env_info_maker_1))
-    process_maker_2 = torch.multiprocessing.Process(target=launch_maker, args=(args, env_info_maker_2))
+    maker_port = str(get_free_port())
+    env_info_maker_1 = {'local_rank' : '0',
+                        'rank' : '0',
+                        'world_size' : '2',
+                        'master_port' : maker_port,
+                        'master_addr' : master_addr}
+    env_info_maker_2 = {'local_rank' : '0',
+                        'rank' : '1',
+                        'world_size' : '2',
+                        'master_port': maker_port,
+                        'master_addr' : master_addr}
     
-    process_trainer_1.start()
-    process_trainer_2.start()
-    process_maker_1.start()
-    process_maker_2.start()
-    
-    process_trainer_1.join()
-    process_trainer_2.join()
-    process_maker_1.join()
-    process_maker_2.join()
+    torch.multiprocessing.spawn(spawn_fn, args=(args, [env_info_trainer_1,
+                                                       env_info_trainer_2,
+                                                       env_info_maker_1,
+                                                       env_info_maker_2]),
+                                nprocs=4)
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
@@ -216,5 +238,4 @@ def main(args):
 
     parser.add_argument('--debug', action='store_true')
     args = parser.parse_args()
-    ray.init()
     main(args)
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/ChatGPT/examples/2m2t.sh
index e69de29bb2d1..b1fb23e58757 100644
--- a/applications/ChatGPT/examples/2m2t.sh
+++ b/applications/ChatGPT/examples/2m2t.sh
@@ -0,0 +1,24 @@
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
+
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export RAY_NAMESPACE="lccsr"
+
+python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
+    --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --max_epochs 10  --debug 
\ No newline at end of file

From 9451a54b8b95d9ea4b8b40c536deaff2de8c2936 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 28 Mar 2023 17:10:46 +0800
Subject: [PATCH 21/36] facing actor's init order issue

---
 .../chatgpt/experience_maker/detached.py      |  6 +-
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  6 +-
 applications/ChatGPT/chatgpt/trainer/utils.py | 12 +++-
 applications/ChatGPT/examples/2m2t.py         | 55 +++----------------
 4 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index cd529d4039e8..3052009aa6a7 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -8,7 +8,7 @@
 from chatgpt.models.base import Actor, Critic, RewardModel
 from chatgpt.trainer.strategies.sampler import DistributedSampler
 from chatgpt.trainer.strategies import Strategy
-from chatgpt.trainer.utils import is_rank_0, get_strategy_from_args
+from chatgpt.trainer.utils import is_rank_0, get_strategy_from_args, set_dist_env
 from copy import deepcopy
 from threading import Lock
 import time
@@ -28,9 +28,13 @@ class ExperienceMakerHolder:
     def __init__(self,
                  detached_trainer_name_list: List[str],
                  strategy: str,
+                 env_info: Dict[str, str] = None,
                  experience_batch_size: int = 8,
                  kl_coef: float = 0.1,
                  **generate_kwargs):
+        # set environment variables
+        if env_info:
+            set_dist_env(env_info=env_info)
         self.target_trainer_list = []
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name, namespace=os.environ["RAY_NAMESPACE"]))
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index b338fc93881e..6b6395367bf9 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -18,7 +18,7 @@
 from .detached_base import DetachedTrainer
 from .callbacks import Callback
 from .strategies import Strategy
-from .utils import is_rank_0, get_cuda_actor_critic_from_args, get_strategy_from_args
+from .utils import is_rank_0, get_cuda_actor_critic_from_args, get_strategy_from_args, set_dist_env
 
 import ray
 import copy
@@ -49,6 +49,7 @@ def __init__(self,
                  experience_maker_holder_name_list: List[str],
                  strategy: str,
                  model: str,
+                 env_info: Dict[str, str] = None,
                  pretrained: str = None,
                  lora_rank: int = 0,
                  train_batch_size: int = 8,
@@ -61,6 +62,9 @@ def __init__(self,
                  dataloader_pin_memory: bool = True,
                  callbacks: List[Callback] = [],
                  **generate_kwargs) -> None:
+        # set environment variables
+        if env_info:
+            set_dist_env(env_info=env_info)
         # configure strategy
         self.strategy = get_strategy_from_args(strategy)
         # configure models, loss and optimizers
diff --git a/applications/ChatGPT/chatgpt/trainer/utils.py b/applications/ChatGPT/chatgpt/trainer/utils.py
index 7bc304dedae2..1d379540f0c5 100644
--- a/applications/ChatGPT/chatgpt/trainer/utils.py
+++ b/applications/ChatGPT/chatgpt/trainer/utils.py
@@ -1,9 +1,12 @@
 import torch.distributed as dist
+from typing import Any, Callable, Dict, List, Optional
 from chatgpt.models.bloom import BLOOMActor, BLOOMCritic
 from chatgpt.models.gpt import GPTActor, GPTCritic
 from chatgpt.models.opt import OPTActor, OPTCritic
 from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 import torch
+import os
+
 def is_rank_0() -> bool:
     return not dist.is_initialized() or dist.get_rank() == 0
 
@@ -33,4 +36,11 @@ def get_strategy_from_args(strategy:str):
         strategy_ = ColossalAIStrategy(stage=2, placement_policy='cuda')
     else:
         raise ValueError(f'Unsupported strategy "{strategy}"')
-    return strategy_
\ No newline at end of file
+    return strategy_
+
+def set_dist_env(env_info: Dict[str, str]):
+    os.environ["RANK"] = env_info['rank']
+    os.environ["LOCAL_RANK"] = env_info['local_rank']
+    os.environ["WORLD_SIZE"] = env_info['world_size']
+    os.environ['MASTER_PORT'] = env_info['master_port']
+    os.environ['MASTER_ADDR'] = env_info['master_addr']
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/ChatGPT/examples/2m2t.py
index 12114f2b99b5..984538b25b18 100644
--- a/applications/ChatGPT/examples/2m2t.py
+++ b/applications/ChatGPT/examples/2m2t.py
@@ -33,27 +33,7 @@ def get_local_ip():
 
 def launch_trainer(args, env_info):
     ray.init()
-    # manually set environs
-    os.environ["RANK"] = env_info['rank']
-    os.environ["LOCAL_RANK"] = env_info['local_rank']
-    os.environ["WORLD_SIZE"] = env_info['world_size']
-    os.environ['MASTER_PORT'] = env_info['master_port']
-    os.environ['MASTER_ADDR'] = env_info['master_addr']
-    rank = int(os.environ['RANK'])
-
-    # configure Trainer strategy
-    # ! Supposed to be DDP !
-    if args.trainer_strategy == 'naive':
-        strategy = NaiveStrategy()
-    elif args.trainer_strategy == 'ddp':
-        strategy = DDPStrategy()
-    elif args.trainer_strategy == 'colossalai_gemini':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.trainer_strategy == 'colossalai_zero2':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    else:
-        raise ValueError(f'Unsupported strategy "{args.trainer_strategy}"')
-
+    rank = int(env_info['rank'])
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -73,8 +53,9 @@ def launch_trainer(args, env_info):
         name = "trainer2"
     trainer_ref = DetachedPPOTrainer.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1", "maker2"],
-        strategy=strategy,
+        strategy=args.trainer_strategy,
         model=args.model,
+        env_info=env_info,
         pretrained=args.pretrain,
         lora_rank=args.lora_rank,
         train_batch_size=args.train_batch_size,
@@ -104,30 +85,9 @@ def launch_trainer(args, env_info):
         trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
                                                      only_rank0=False)
 
-
-
 def launch_maker(args, env_info):
     ray.init()
-    os.environ["RANK"] = env_info['rank']
-    os.environ["LOCAL_RANK"] = env_info['local_rank']
-    os.environ["WORLD_SIZE"] = env_info['world_size']
-    os.environ['MASTER_PORT'] = env_info['master_port']
-    os.environ['MASTER_ADDR'] = env_info['master_addr']
-    rank = int(os.environ['RANK'])
-
-    # configure Trainer strategy
-    # ! Supposed to be DDP !
-    if args.maker_strategy == 'naive':
-        strategy = NaiveStrategy()
-    elif args.maker_strategy == 'ddp':
-        strategy = DDPStrategy()
-    elif args.maker_strategy == 'colossalai_gemini':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.maker_strategy == 'colossalai_zero2':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    else:
-        raise ValueError(f'Unsupported strategy "{args.maker_strategy}"')
-
+    rank = int(env_info['rank'])
      # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -139,7 +99,6 @@ def launch_maker(args, env_info):
         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
-
     # configure Experience Maker
     if rank == 0:
         name = "maker1"
@@ -147,7 +106,8 @@ def launch_maker(args, env_info):
         name = "maker2"
     experience_holder_ref = ExperienceMakerHolder.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1", "trainer2"],
-        strategy=strategy,
+        strategy=args.maker_strategy,
+        env_info=env_info,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
@@ -161,7 +121,6 @@ def launch_maker(args, env_info):
     )
     # configure sampler
     dataset = pd.read_csv(args.prompt_path)['prompt']
-    sampler = strategy.setup_sampler(dataset)
 
     def tokenize_fn(texts):
         # MUST padding to max length to ensure inputs of all ranks have the same length
@@ -170,7 +129,7 @@ def tokenize_fn(texts):
         return {k: v.cuda() for k, v in batch.items()}
 
     num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3  # +3 for fault tolerance
-    maker_done_ref = experience_holder_ref.workingloop.remote(sampler, tokenize_fn, times=num_exp_per_maker)
+    maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
 
     ray.get(maker_done_ref)
 

From 96265180067febb93ba9aec4283a615451c65561 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 28 Mar 2023 18:02:59 +0800
Subject: [PATCH 22/36] facing ddp model update issue (need unwarp ddp)

---
 .../chatgpt/experience_maker/detached.py      |   2 +-
 applications/ChatGPT/examples/2m2t.py         | 176 +++++++++---------
 applications/ChatGPT/examples/2m2t.sh         |   2 +-
 3 files changed, 93 insertions(+), 87 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 3052009aa6a7..435a10d9ce51 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -88,7 +88,7 @@ def _send_experience(self, experience):
                         min_length = temp_length
                         chosen_trainer = target_trainer
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[maker] sending exp")
+            print(f"[maker] sending exp to {chosen_trainer}")
         chosen_trainer.buffer_append.remote(experience)
 
     def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/ChatGPT/examples/2m2t.py
index 984538b25b18..d50a21099d65 100644
--- a/applications/ChatGPT/examples/2m2t.py
+++ b/applications/ChatGPT/examples/2m2t.py
@@ -30,10 +30,37 @@ def get_local_ip():
         s.connect(('8.8.8.8', 80))
         return s.getsockname()[0]
 
-
-def launch_trainer(args, env_info):
+def main(args):
+    master_addr = str(get_local_ip())
+    # trainer_env_info
+    trainer_port = str(get_free_port())
+    env_info_trainer_1 = {'local_rank' : '0',
+                          'rank' : '0',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    env_info_trainer_2 = {'local_rank' : '0',
+                          'rank' : '1',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    # maker_env_info
+    maker_port = str(get_free_port())
+    env_info_maker_1 = {'local_rank' : '0',
+                        'rank' : '0',
+                        'world_size' : '2',
+                        'master_port' : maker_port,
+                        'master_addr' : master_addr}
+    env_info_maker_2 = {'local_rank' : '0',
+                        'rank' : '1',
+                        'world_size' : '2',
+                        'master_port': maker_port,
+                        'master_addr' : master_addr}
+    print([env_info_trainer_1, 
+           env_info_trainer_2,
+           env_info_maker_1,
+           env_info_maker_2])
     ray.init()
-    rank = int(env_info['rank'])
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -45,17 +72,13 @@ def launch_trainer(args, env_info):
         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
-
+    
     # configure Trainer
-    if rank == 0:
-        name = "trainer1"
-    elif rank == 1:
-        name = "trainer2"
-    trainer_ref = DetachedPPOTrainer.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+    trainer_1_ref = DetachedPPOTrainer.options(name="trainer1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1", "maker2"],
         strategy=args.trainer_strategy,
         model=args.model,
-        env_info=env_info,
+        env_info=env_info_trainer_1,
         pretrained=args.pretrain,
         lora_rank=args.lora_rank,
         train_batch_size=args.train_batch_size,
@@ -72,42 +95,32 @@ def launch_trainer(args, env_info):
         debug=args.debug,
     )
 
-    # trainer send its actor and critic to experience holder.
-    ray.get(trainer_ref.initialize_remote_makers.remote())
-
-    trainer_done_ref = trainer_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
-    ray.get(trainer_done_ref)
-
-    # save model checkpoint after fitting
-    trainer_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
-    # save optimizer checkpoint on all ranks
-    if args.need_optim_ckpt:
-        trainer_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
-                                                     only_rank0=False)
+    trainer_2_ref = DetachedPPOTrainer.options(name="trainer2", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        experience_maker_holder_name_list=["maker1", "maker2"],
+        strategy=args.trainer_strategy,
+        model=args.model,
+        env_info=env_info_trainer_2,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        train_batch_size=args.train_batch_size,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epochs=args.max_epochs,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
 
-def launch_maker(args, env_info):
-    ray.init()
-    rank = int(env_info['rank'])
-     # configure tokenizer
-    if args.model == 'gpt2':
-        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-        tokenizer.pad_token = tokenizer.eos_token
-    elif args.model == 'bloom':
-        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
-        tokenizer.pad_token = tokenizer.eos_token
-    elif args.model == 'opt':
-        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
-    else:
-        raise ValueError(f'Unsupported model "{args.model}"')
     # configure Experience Maker
-    if rank == 0:
-        name = "maker1"
-    elif rank == 1:
-        name = "maker2"
-    experience_holder_ref = ExperienceMakerHolder.options(name=name, namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+    experience_holder_1_ref = ExperienceMakerHolder.options(name="maker1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1", "trainer2"],
         strategy=args.maker_strategy,
-        env_info=env_info,
+        env_info=env_info_maker_1,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
@@ -119,59 +132,52 @@ def launch_maker(args, env_info):
         eos_token_id=tokenizer.eos_token_id,
         debug=args.debug,
     )
+    
+    experience_holder_2_ref = ExperienceMakerHolder.options(name="maker2", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1", "trainer2"],
+        strategy=args.maker_strategy,
+        env_info=env_info_maker_2,
+        experience_batch_size=args.experience_batch_size,
+        kl_coef=0.1,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+    
+    # trainer send its actor and critic to experience holders.
+    # TODO: balance duty
+    ray.get(trainer_1_ref.initialize_remote_makers.remote())
+
     # configure sampler
     dataset = pd.read_csv(args.prompt_path)['prompt']
-
+    
     def tokenize_fn(texts):
         # MUST padding to max length to ensure inputs of all ranks have the same length
         # Different length may lead to hang when using gemini, as different generation steps
         batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
         return {k: v.cuda() for k, v in batch.items()}
 
+    trainer_1_done_ref = trainer_1_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
+    trainer_2_done_ref = trainer_2_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
     num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs + 3  # +3 for fault tolerance
-    maker_done_ref = experience_holder_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
-
-    ray.get(maker_done_ref)
-
-def spawn_fn(rank, args, env_info_list):
-    if rank == 0 or rank == 1:
-        launch_trainer(args, env_info_list[rank])
-    elif rank == 2 or rank == 3:
-        launch_maker(args, env_info_list[rank])
+    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
+    maker_2_done_ref = experience_holder_2_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
     
-
-def main(args):
-    master_addr = str(get_local_ip())
-    # trainer_env_info
-    trainer_port = str(get_free_port())
-    env_info_trainer_1 = {'local_rank' : '0',
-                          'rank' : '0',
-                          'world_size' : '2',
-                          'master_port' : trainer_port,
-                          'master_addr' : master_addr}
-    env_info_trainer_2 = {'local_rank' : '0',
-                          'rank' : '1',
-                          'world_size' : '2',
-                          'master_port' : trainer_port,
-                          'master_addr' : master_addr}
-    # maker_env_info
-    maker_port = str(get_free_port())
-    env_info_maker_1 = {'local_rank' : '0',
-                        'rank' : '0',
-                        'world_size' : '2',
-                        'master_port' : maker_port,
-                        'master_addr' : master_addr}
-    env_info_maker_2 = {'local_rank' : '0',
-                        'rank' : '1',
-                        'world_size' : '2',
-                        'master_port': maker_port,
-                        'master_addr' : master_addr}
-    
-    torch.multiprocessing.spawn(spawn_fn, args=(args, [env_info_trainer_1,
-                                                       env_info_trainer_2,
-                                                       env_info_maker_1,
-                                                       env_info_maker_2]),
-                                nprocs=4)
+    ray.get([trainer_1_done_ref, trainer_2_done_ref, maker_1_done_ref, maker_2_done_ref])
+    # save model checkpoint after fitting
+    trainer_1_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    trainer_2_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    # save optimizer checkpoint on all ranks
+    if args.need_optim_ckpt:
+        trainer_1_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                 only_rank0=False)
+        trainer_2_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                 only_rank0=False)
 
 
 if __name__ == '__main__':
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/ChatGPT/examples/2m2t.sh
index b1fb23e58757..c9099245a6d1 100644
--- a/applications/ChatGPT/examples/2m2t.sh
+++ b/applications/ChatGPT/examples/2m2t.sh
@@ -19,6 +19,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
+    --maker_strategy naive --trainer_strategy naive --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  --debug 
\ No newline at end of file

From d6370324cb26d70b4e309f9a724e64afb82861af Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 29 Mar 2023 10:55:11 +0800
Subject: [PATCH 23/36] unwrap ddp actor

---
 .../chatgpt/experience_maker/detached.py      |  5 ++--
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   | 26 +++++++++++++------
 applications/ChatGPT/examples/2m2t.sh         |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 435a10d9ce51..4eff4e34a3f8 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -116,10 +116,9 @@ def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print('[maker] INIT')
         with torch.no_grad():
-            actor = init_actor
-            critic = init_critic
-
             with self.strategy.model_init_context():
+                actor = init_actor
+                critic = init_critic
                 initial_model = deepcopy(actor)
                 reward_model = RewardModel(deepcopy(critic.model), 
                                            deepcopy(critic.value_head)).to(torch.cuda.current_device())
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 6b6395367bf9..9adb3ff788e5 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -23,7 +23,7 @@
 import ray
 import copy
 
-@ray.remote(concurrency_groups={"experience_io": 3, "model_io": 1, "compute": 1})
+@ray.remote(concurrency_groups={"experience_io": 4, "model_io": 1, "compute": 2})
 class DetachedPPOTrainer(DetachedTrainer):
     '''
         Detached Trainer for PPO algorithm
@@ -101,7 +101,7 @@ def _update_remote_makers(self):
             for target_holder in self.target_holder_list:
                 # TODO: reduce malloc
                 with torch.no_grad():
-                    ray.get(target_holder.update_experience_maker.remote(self.actor, self.critic))
+                    ray.get(target_holder.update_experience_maker.remote(self._get_unwrapped_actor(), self._get_unwrapped_critic()))
                     
     @ray.method(concurrency_group="model_io")
     def initialize_remote_makers(self):
@@ -111,7 +111,7 @@ def initialize_remote_makers(self):
             for target_holder in self.target_holder_list:
                 # TODO: reduce malloc
                 with torch.no_grad():
-                    ray.get(target_holder.initialize_experience_maker.remote(self.actor, self.critic))
+                    ray.get(target_holder.initialize_experience_maker.remote(self._get_unwrapped_actor(), self._get_unwrapped_critic()))
 
     @ray.method(concurrency_group="compute")
     def training_step(self, experience: Experience) -> Dict[str, float]:
@@ -154,11 +154,21 @@ def strategy_save_actor_optim(self, path: str, only_rank0: bool = False) -> None
     def strategy_save_critic_optim(self, path: str, only_rank0: bool = False) -> None:
         self.strategy.save_optimizer(self.critic_optim, path, only_rank0)
 
-    def get_actor(self):
-        return self.actor
-    
-    def get_critic(self):
-        return self.critic
+    def _get_unwrapped_actor(self):
+        if False:
+            pass
+        elif isinstance(self.strategy, DDPStrategy):
+            return Actor(self.strategy._unwrap_actor(self.actor))
+        elif isinstance(self.strategy, NaiveStrategy):
+            return self.actor
+
+    def _get_unwrapped_critic(self):
+        if False:
+            pass
+        elif isinstance(self.strategy, DDPStrategy):
+            return self.critic.module
+        elif isinstance(self.strategy, NaiveStrategy):
+            return self.critic
 
 def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
     origin_model = strategy._unwrap_actor(actor)
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/ChatGPT/examples/2m2t.sh
index c9099245a6d1..b1fb23e58757 100644
--- a/applications/ChatGPT/examples/2m2t.sh
+++ b/applications/ChatGPT/examples/2m2t.sh
@@ -19,6 +19,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --maker_strategy naive --trainer_strategy naive --lora_rank 2 \
+    --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  --debug 
\ No newline at end of file

From 7dadc800a152afd439e13d5b4bb1e88e341330ac Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 29 Mar 2023 15:25:36 +0800
Subject: [PATCH 24/36] checking 1m2t stuck problem

---
 .../chatgpt/experience_maker/detached.py      |   4 +-
 .../ChatGPT/chatgpt/replay_buffer/detached.py |   4 +-
 .../ChatGPT/chatgpt/trainer/detached_base.py  |  10 +-
 applications/ChatGPT/chatgpt/trainer/utils.py |   9 +-
 applications/ChatGPT/examples/1m2t.py         | 183 ++++++++++++++++++
 applications/ChatGPT/examples/1m2t.sh         |  24 +++
 6 files changed, 223 insertions(+), 11 deletions(-)
 create mode 100644 applications/ChatGPT/examples/1m2t.py
 create mode 100644 applications/ChatGPT/examples/1m2t.sh

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 4eff4e34a3f8..0abfea7e85ce 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -19,7 +19,7 @@
 class ExperienceMakerHolder:
     '''
     Args:
-        detached_trainer_name_list: str list to get ray actor handles
+        detached_trainer_name_list: str list to get ray actor handleskkk
         strategy: 
         experience_batch_size: batch size of generated experience
         kl_coef: the coefficient of kl divergence loss
@@ -120,7 +120,7 @@ def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
                 actor = init_actor
                 critic = init_critic
                 initial_model = deepcopy(actor)
-                reward_model = RewardModel(deepcopy(critic.model), 
+                reward_model = RewardModel(deepcopy(critic.model),
                                            deepcopy(critic.value_head)).to(torch.cuda.current_device())
 
             self.experience_maker.actor = actor
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index ca0161c17dd6..75f5659d1d5e 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -27,7 +27,7 @@ def __init__(self, sample_batch_size: int, tp_world_size: int = 1, limit : int =
         self.cpu_offload = cpu_offload
         self.sample_batch_size = sample_batch_size
         self.limit = limit
-        self.items = Queue(self.limit)
+        self.items = Queue(self.limit, actor_options={"num_cpus":1})
         self.batch_collector : List[BufferItem] = []
 
         '''
@@ -52,7 +52,7 @@ def append(self, experience: Experience) -> None:
         while len(self.batch_collector) >= self.sample_batch_size:
             items = self.batch_collector[:self.sample_batch_size]
             experience = make_experience_batch(items)
-            self.items.put(experience)
+            self.items.put(experience, block=False)
             self.batch_collector = self.batch_collector[self.sample_batch_size:]
 
     def clear(self) -> None:
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index 3575e60ba264..b7fb3bb24e70 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -16,6 +16,7 @@
 import ray
 import os
 
+
 class DetachedTrainer(ABC):
     '''
         Base class for detached rlhf trainers. 
@@ -88,22 +89,23 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
                 self._learn()
                 self._update_remote_makers()
             self._on_episode_end(episode)
-        self._on_fit_end() 
+        self._on_fit_end()
 
     @ray.method(concurrency_group="experience_io")
     def buffer_get_length(self):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[trainer] telling length")
+            print("[trainer]        telling length")
         return self.detached_replay_buffer.get_length()
 
     @ray.method(concurrency_group="experience_io")
     def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print(f"[trainer] receiving exp. Current buffer length: {self.detached_replay_buffer.get_length()}")
+            # print(f"[trainer] receiving exp. Current buffer length: {self.detached_replay_buffer.get_length()}")
+            print(f"[trainer]        receiving exp.")
         self.detached_replay_buffer.append(experience)
-        
+
     @ray.method(concurrency_group="experience_io")
     def _buffer_sample(self):
         return self.detached_replay_buffer.sample()
diff --git a/applications/ChatGPT/chatgpt/trainer/utils.py b/applications/ChatGPT/chatgpt/trainer/utils.py
index 1d379540f0c5..cdda39c29f46 100644
--- a/applications/ChatGPT/chatgpt/trainer/utils.py
+++ b/applications/ChatGPT/chatgpt/trainer/utils.py
@@ -7,11 +7,12 @@
 import torch
 import os
 
+
 def is_rank_0() -> bool:
     return not dist.is_initialized() or dist.get_rank() == 0
 
 
-def get_cuda_actor_critic_from_args(model:str, pretrained: str = None, lora_rank=0):
+def get_cuda_actor_critic_from_args(model: str, pretrained: str = None, lora_rank=0):
     if model == 'gpt2':
         actor = GPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
         critic = GPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
@@ -25,7 +26,8 @@ def get_cuda_actor_critic_from_args(model:str, pretrained: str = None, lora_rank
         raise ValueError(f'Unsupported model "{model}"')
     return actor, critic
 
-def get_strategy_from_args(strategy:str):
+
+def get_strategy_from_args(strategy: str):
     if strategy == 'naive':
         strategy_ = NaiveStrategy()
     elif strategy == 'ddp':
@@ -38,9 +40,10 @@ def get_strategy_from_args(strategy:str):
         raise ValueError(f'Unsupported strategy "{strategy}"')
     return strategy_
 
+
 def set_dist_env(env_info: Dict[str, str]):
     os.environ["RANK"] = env_info['rank']
     os.environ["LOCAL_RANK"] = env_info['local_rank']
     os.environ["WORLD_SIZE"] = env_info['world_size']
     os.environ['MASTER_PORT'] = env_info['master_port']
-    os.environ['MASTER_ADDR'] = env_info['master_addr']
\ No newline at end of file
+    os.environ['MASTER_ADDR'] = env_info['master_addr']
diff --git a/applications/ChatGPT/examples/1m2t.py b/applications/ChatGPT/examples/1m2t.py
new file mode 100644
index 000000000000..e26688067bb6
--- /dev/null
+++ b/applications/ChatGPT/examples/1m2t.py
@@ -0,0 +1,183 @@
+import argparse
+from copy import deepcopy
+
+import pandas as pd
+import torch
+from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
+from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from torch.optim import Adam
+from transformers import AutoTokenizer, BloomTokenizerFast
+from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
+
+from colossalai.nn.optimizer import HybridAdam
+
+import ray
+
+import os
+import socket
+import multiprocessing
+
+
+def get_free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('', 0))
+        return s.getsockname()[1]
+
+
+def get_local_ip():
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
+        s.connect(('8.8.8.8', 80))
+        return s.getsockname()[0]
+
+def main(args):
+    master_addr = str(get_local_ip())
+    # trainer_env_info
+    trainer_port = str(get_free_port())
+    env_info_trainer_1 = {'local_rank' : '0',
+                          'rank' : '0',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    env_info_trainer_2 = {'local_rank' : '0',
+                          'rank' : '1',
+                          'world_size' : '2',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    # maker_env_info
+    maker_port = str(get_free_port())
+    env_info_maker_1 = {'local_rank' : '0',
+                        'rank' : '0',
+                        'world_size' : '2',
+                        'master_port' : maker_port,
+                        'master_addr' : master_addr}
+    print([env_info_trainer_1, 
+           env_info_trainer_2,
+           env_info_maker_1])
+    ray.init(dashboard_port = 1145)
+    # configure tokenizer
+    if args.model == 'gpt2':
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'bloom':
+        tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
+        tokenizer.pad_token = tokenizer.eos_token
+    elif args.model == 'opt':
+        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+    else:
+        raise ValueError(f'Unsupported model "{args.model}"')
+    
+    # configure Trainer
+    trainer_1_ref = DetachedPPOTrainer.options(name="trainer1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        experience_maker_holder_name_list=["maker1"],
+        strategy=args.trainer_strategy,
+        model=args.model,
+        env_info=env_info_trainer_1,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        train_batch_size=args.train_batch_size,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epochs=args.max_epochs,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+
+    trainer_2_ref = DetachedPPOTrainer.options(name="trainer2", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        experience_maker_holder_name_list=["maker1"],
+        strategy=args.trainer_strategy,
+        model=args.model,
+        env_info=env_info_trainer_2,
+        pretrained=args.pretrain,
+        lora_rank=args.lora_rank,
+        train_batch_size=args.train_batch_size,
+        buffer_limit=16,
+        experience_batch_size=args.experience_batch_size,
+        max_epochs=args.max_epochs,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+
+    # configure Experience Maker
+    experience_holder_1_ref = ExperienceMakerHolder.options(name="maker1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
+        detached_trainer_name_list=["trainer1", "trainer2"],
+        strategy=args.maker_strategy,
+        env_info=env_info_maker_1,
+        experience_batch_size=args.experience_batch_size,
+        kl_coef=0.1,
+        #kwargs:
+        max_length=128,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        debug=args.debug,
+    )
+    
+    # trainer send its actor and critic to experience holders.
+    # TODO: balance duty
+    ray.get(trainer_1_ref.initialize_remote_makers.remote())
+
+    # configure sampler
+    dataset = pd.read_csv(args.prompt_path)['prompt']
+    
+    def tokenize_fn(texts):
+        # MUST padding to max length to ensure inputs of all ranks have the same length
+        # Different length may lead to hang when using gemini, as different generation steps
+        batch = tokenizer(texts, return_tensors='pt', max_length=96, padding='max_length', truncation=True)
+        return {k: v.cuda() for k, v in batch.items()}
+
+    trainer_1_done_ref = trainer_1_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
+    trainer_2_done_ref = trainer_2_ref.fit.remote(num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps)
+    num_exp_per_maker = args.num_episodes * args.max_timesteps // args.update_timesteps * args.max_epochs * 2 + 3  # +3 for fault tolerance
+    maker_1_done_ref = experience_holder_1_ref.workingloop.remote(dataset, tokenize_fn, times=num_exp_per_maker)
+    
+    ray.get([trainer_1_done_ref, trainer_2_done_ref, maker_1_done_ref])
+    # save model checkpoint after fitting
+    trainer_1_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    trainer_2_ref.strategy_save_actor.remote(args.save_path, only_rank0=True)
+    # save optimizer checkpoint on all ranks
+    if args.need_optim_ckpt:
+        trainer_1_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                 only_rank0=False)
+        trainer_2_ref.strategy_save_actor_optim.remote('actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()),
+                                                 only_rank0=False)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('prompt_path')
+    parser.add_argument('--trainer_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--maker_strategy',
+                        choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
+                        default='naive')
+    parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
+    parser.add_argument('--pretrain', type=str, default=None)
+    parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts.pt')
+    parser.add_argument('--need_optim_ckpt', type=bool, default=False)
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_timesteps', type=int, default=10)
+    parser.add_argument('--update_timesteps', type=int, default=10)
+    parser.add_argument('--max_epochs', type=int, default=5)
+    parser.add_argument('--train_batch_size', type=int, default=8)
+    parser.add_argument('--experience_batch_size', type=int, default=8)
+    parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank")
+
+    parser.add_argument('--debug', action='store_true')
+    args = parser.parse_args()
+    main(args)
diff --git a/applications/ChatGPT/examples/1m2t.sh b/applications/ChatGPT/examples/1m2t.sh
new file mode 100644
index 000000000000..3f7074844ff5
--- /dev/null
+++ b/applications/ChatGPT/examples/1m2t.sh
@@ -0,0 +1,24 @@
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
+
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+export RAY_NAMESPACE="lccsr"
+
+python 1m2t.py "./awesome-chatgpt-prompts/prompts.csv" --model gpt2 \
+    --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --max_epochs 10  --debug 
\ No newline at end of file

From 09f611dc9a58380f66e99569d64581946a02006d Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 29 Mar 2023 17:09:09 +0800
Subject: [PATCH 25/36] nothing

---
 .../ChatGPT/chatgpt/replay_buffer/detached.py | 23 ++++++++------
 .../ChatGPT/chatgpt/trainer/detached_base.py  | 12 ++++---
 .../ChatGPT/chatgpt/trainer/detached_ppo.py   |  2 +-
 applications/ChatGPT/examples/1m1t.py         | 31 +++++++++++++++++++
 applications/ChatGPT/examples/1m1t.sh         |  2 +-
 applications/ChatGPT/examples/1m2t.py         |  4 +--
 6 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index 75f5659d1d5e..bcda100496ff 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -52,7 +52,8 @@ def append(self, experience: Experience) -> None:
         while len(self.batch_collector) >= self.sample_batch_size:
             items = self.batch_collector[:self.sample_batch_size]
             experience = make_experience_batch(items)
-            self.items.put(experience, block=False)
+            self.items.put(experience, block=True)
+            print(" queue exp in")
             self.batch_collector = self.batch_collector[self.sample_batch_size:]
 
     def clear(self) -> None:
@@ -68,20 +69,22 @@ def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
         if not any(self.worker_state):
             self.held_sample = self._sample_and_erase()
         self.worker_state[worker_rank] = True
-        self._worker_state_lock.release()
-
-        ret = copy.deepcopy(self.held_sample)
-        ret.to_device(to_device)
-        
-        self._worker_state_lock.acquire()
         if all(self.worker_state):
             self.worker_state = [False] * self.tp_world_size
+            ret = self.held_sample
+        else:
+            ret = copy.deepcopy(self.held_sample)
         self._worker_state_lock.release()
+        ret.to_device(to_device)
         return ret
-        
+
     @torch.no_grad()
     def _sample_and_erase(self) -> Experience:
-        return self.items.get(block=True)
+        ret = self.items.get(block=True)
+        print(" queue exp out")
+        return ret
 
     def get_length(self) -> int:
-        return self.items.qsize()
\ No newline at end of file
+        ret = self.items.qsize()
+        print(" queue return length")
+        return ret
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/ChatGPT/chatgpt/trainer/detached_base.py
index b7fb3bb24e70..68e625db68e1 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_base.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_base.py
@@ -77,6 +77,8 @@ def _learn(self):
             if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
                 print("[trainer] training step")
             metrics = self.training_step(experience)
+            if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+                print("[trainer] step over")
             pbar.set_postfix(metrics)
 
     def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timesteps: int = 5000) -> None:
@@ -91,22 +93,22 @@ def fit(self, num_episodes: int = 50000, max_timesteps: int = 500, update_timest
             self._on_episode_end(episode)
         self._on_fit_end()
 
-    @ray.method(concurrency_group="experience_io")
+    @ray.method(concurrency_group="buffer_length")
     def buffer_get_length(self):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
-            print("[trainer]        telling length")
+            print("[trainer]                telling length")
         return self.detached_replay_buffer.get_length()
 
-    @ray.method(concurrency_group="experience_io")
+    @ray.method(concurrency_group="buffer_append")
     def buffer_append(self, experience: Experience):
         # called by ExperienceMakerHolder
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             # print(f"[trainer] receiving exp. Current buffer length: {self.detached_replay_buffer.get_length()}")
-            print(f"[trainer]        receiving exp.")
+            print(f"[trainer]               receiving exp.")
         self.detached_replay_buffer.append(experience)
 
-    @ray.method(concurrency_group="experience_io")
+    @ray.method(concurrency_group="buffer_sample")
     def _buffer_sample(self):
         return self.detached_replay_buffer.sample()
 
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
index 9adb3ff788e5..6ceba983b2da 100644
--- a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
+++ b/applications/ChatGPT/chatgpt/trainer/detached_ppo.py
@@ -23,7 +23,7 @@
 import ray
 import copy
 
-@ray.remote(concurrency_groups={"experience_io": 4, "model_io": 1, "compute": 2})
+@ray.remote(concurrency_groups={"buffer_length": 1, "buffer_append":1, "buffer_sample":1,"model_io": 1, "compute": 1})
 class DetachedPPOTrainer(DetachedTrainer):
     '''
         Detached Trainer for PPO algorithm
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/ChatGPT/examples/1m1t.py
index 624bbe02bf97..c8f636782b73 100644
--- a/applications/ChatGPT/examples/1m1t.py
+++ b/applications/ChatGPT/examples/1m1t.py
@@ -14,8 +14,37 @@
 
 import ray
 import os
+import socket
 
+def get_free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('', 0))
+        return s.getsockname()[1]
+
+
+def get_local_ip():
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
+        s.connect(('8.8.8.8', 80))
+        return s.getsockname()[0]
+    
 def main(args):
+    master_addr = str(get_local_ip())
+    # trainer_env_info
+    trainer_port = str(get_free_port())
+    env_info_trainer = {'local_rank' : '0',
+                          'rank' : '0',
+                          'world_size' : '1',
+                          'master_port' : trainer_port,
+                          'master_addr' : master_addr}
+    
+    # maker_env_info
+    maker_port = str(get_free_port())
+    env_info_maker = {'local_rank' : '0',
+                        'rank' : '0',
+                        'world_size' : '1',
+                        'master_port' : maker_port,
+                        'master_addr' : master_addr}
+
     # configure tokenizer
     if args.model == 'gpt2':
         tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
@@ -33,6 +62,7 @@ def main(args):
         experience_maker_holder_name_list=["maker1"],
         strategy=args.trainer_strategy,
         model=args.model,
+        env_info = env_info_trainer,
         pretrained=args.pretrain,
         lora_rank=args.lora_rank,
         train_batch_size=args.train_batch_size,
@@ -53,6 +83,7 @@ def main(args):
     experience_holder_ref = ExperienceMakerHolder.options(name="maker1", num_gpus=1, max_concurrency=2).remote(
         detached_trainer_name_list=["trainer1"],
         strategy=args.maker_strategy,
+        env_info = env_info_maker,
         experience_batch_size=args.experience_batch_size,
         kl_coef=0.1,
         #kwargs:
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/ChatGPT/examples/1m1t.sh
index 927ab5a1c0ac..0f31c2647344 100644
--- a/applications/ChatGPT/examples/1m1t.sh
+++ b/applications/ChatGPT/examples/1m1t.sh
@@ -19,6 +19,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --trainer_strategy naive --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --trainer_strategy ddp --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug
diff --git a/applications/ChatGPT/examples/1m2t.py b/applications/ChatGPT/examples/1m2t.py
index e26688067bb6..dd4aad1f51e9 100644
--- a/applications/ChatGPT/examples/1m2t.py
+++ b/applications/ChatGPT/examples/1m2t.py
@@ -66,7 +66,7 @@ def main(args):
         tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
-    
+
     # configure Trainer
     trainer_1_ref = DetachedPPOTrainer.options(name="trainer1", namespace=os.environ["RAY_NAMESPACE"], num_gpus=1, max_concurrency=2).remote(
         experience_maker_holder_name_list=["maker1"],
@@ -107,7 +107,7 @@ def main(args):
         top_k=50,
         pad_token_id=tokenizer.pad_token_id,
         eos_token_id=tokenizer.eos_token_id,
-        debug=args.debug,
+        debug= False#args.debug,
     )
 
     # configure Experience Maker

From 459639c330227e79f95c54c32824749df7668d1b Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 29 Mar 2023 17:49:28 +0800
Subject: [PATCH 26/36] set timeout for trainer choosing. It solves the stuck
 problem!

---
 .../chatgpt/experience_maker/detached.py        | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index 0abfea7e85ce..aaa35dc07fc1 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -3,6 +3,7 @@
 from .naive import NaiveExperienceMaker, Experience, ExperienceMaker
 from ..replay_buffer.detached import DetachedReplayBuffer
 import ray
+from ray.exceptions import GetTimeoutError
 from torch import Tensor
 import torch.nn as nn
 from chatgpt.models.base import Actor, Critic, RewardModel
@@ -79,14 +80,18 @@ def _send_experience(self, experience):
             print("[maker] choosing tartget trainer")
         while chosen_trainer is None:
             for target_trainer in self.target_trainer_list:
-                temp_length = ray.get(target_trainer.buffer_get_length.remote())
-                if min_length is None:
-                    min_length = temp_length
-                    chosen_trainer = target_trainer
-                else:
-                    if temp_length < min_length:
+                try:
+                    temp_length = ray.get(target_trainer.buffer_get_length.remote(), timeout=1.0)
+                    if min_length is None:
                         min_length = temp_length
                         chosen_trainer = target_trainer
+                    else:
+                        if temp_length < min_length:
+                            min_length = temp_length
+                            chosen_trainer = target_trainer
+                except GetTimeoutError:
+                    pass
+                    
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print(f"[maker] sending exp to {chosen_trainer}")
         chosen_trainer.buffer_append.remote(experience)

From 65363e159c80201c0277e725504bc31ec16e9da2 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Wed, 29 Mar 2023 17:56:50 +0800
Subject: [PATCH 27/36] delete some debug output

---
 applications/ChatGPT/chatgpt/experience_maker/detached.py | 2 +-
 applications/ChatGPT/chatgpt/replay_buffer/detached.py    | 3 ---
 applications/ChatGPT/examples/1m2t.py                     | 2 +-
 applications/ChatGPT/examples/1m2t.sh                     | 2 +-
 applications/ChatGPT/examples/2m2t.sh                     | 2 +-
 5 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/ChatGPT/chatgpt/experience_maker/detached.py
index aaa35dc07fc1..3cfb5d9d54f2 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/ChatGPT/chatgpt/experience_maker/detached.py
@@ -81,7 +81,7 @@ def _send_experience(self, experience):
         while chosen_trainer is None:
             for target_trainer in self.target_trainer_list:
                 try:
-                    temp_length = ray.get(target_trainer.buffer_get_length.remote(), timeout=1.0)
+                    temp_length = ray.get(target_trainer.buffer_get_length.remote(), timeout=0.1)
                     if min_length is None:
                         min_length = temp_length
                         chosen_trainer = target_trainer
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
index bcda100496ff..c21c8b4b38f8 100644
--- a/applications/ChatGPT/chatgpt/replay_buffer/detached.py
+++ b/applications/ChatGPT/chatgpt/replay_buffer/detached.py
@@ -53,7 +53,6 @@ def append(self, experience: Experience) -> None:
             items = self.batch_collector[:self.sample_batch_size]
             experience = make_experience_batch(items)
             self.items.put(experience, block=True)
-            print(" queue exp in")
             self.batch_collector = self.batch_collector[self.sample_batch_size:]
 
     def clear(self) -> None:
@@ -81,10 +80,8 @@ def sample(self, worker_rank = 0, to_device = "cpu") -> Experience:
     @torch.no_grad()
     def _sample_and_erase(self) -> Experience:
         ret = self.items.get(block=True)
-        print(" queue exp out")
         return ret
 
     def get_length(self) -> int:
         ret = self.items.qsize()
-        print(" queue return length")
         return ret
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/1m2t.py b/applications/ChatGPT/examples/1m2t.py
index dd4aad1f51e9..8a0083c15b3d 100644
--- a/applications/ChatGPT/examples/1m2t.py
+++ b/applications/ChatGPT/examples/1m2t.py
@@ -107,7 +107,7 @@ def main(args):
         top_k=50,
         pad_token_id=tokenizer.pad_token_id,
         eos_token_id=tokenizer.eos_token_id,
-        debug= False#args.debug,
+        debug= args.debug,
     )
 
     # configure Experience Maker
diff --git a/applications/ChatGPT/examples/1m2t.sh b/applications/ChatGPT/examples/1m2t.sh
index 3f7074844ff5..0a7c66636d27 100644
--- a/applications/ChatGPT/examples/1m2t.sh
+++ b/applications/ChatGPT/examples/1m2t.sh
@@ -21,4 +21,4 @@ export RAY_NAMESPACE="lccsr"
 python 1m2t.py "./awesome-chatgpt-prompts/prompts.csv" --model gpt2 \
     --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  --debug 
\ No newline at end of file
+    --max_epochs 10  #--debug 
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/ChatGPT/examples/2m2t.sh
index b1fb23e58757..9c49d97845c5 100644
--- a/applications/ChatGPT/examples/2m2t.sh
+++ b/applications/ChatGPT/examples/2m2t.sh
@@ -21,4 +21,4 @@ export RAY_NAMESPACE="lccsr"
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  --debug 
\ No newline at end of file
+    --max_epochs 10  # --debug 
\ No newline at end of file

From 2f8036b30c2d1035850ce0736949953b0c1cb826 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 30 Mar 2023 14:29:55 +0800
Subject: [PATCH 28/36] rename to sync with upstream

---
 applications/{ChatGPT => Chat}/.gitignore       |  0
 applications/{ChatGPT => Chat}/LICENSE          |  0
 applications/{ChatGPT => Chat}/README.md        |  0
 .../{ChatGPT => Chat}/benchmarks/README.md      |  0
 .../benchmarks/benchmark_gpt_dummy.py           |  0
 .../benchmarks/benchmark_gpt_dummy.sh           |  0
 .../benchmarks/benchmark_opt_lora_dummy.py      |  0
 .../{ChatGPT => Chat}/chatgpt/__init__.py       |  0
 .../chatgpt/dataset/__init__.py                 |  0
 .../chatgpt/dataset/reward_dataset.py           |  0
 .../chatgpt/dataset/sft_dataset.py              |  0
 .../{ChatGPT => Chat}/chatgpt/dataset/utils.py  |  0
 .../chatgpt/experience_maker/__init__.py        |  0
 .../chatgpt/experience_maker/base.py            |  0
 .../chatgpt/experience_maker/detached.py        | 17 +++++++++++++++++
 .../chatgpt/experience_maker/naive.py           |  0
 .../chatgpt/experience_maker/strategy/base.py   |  0
 .../chatgpt/models/__init__.py                  |  0
 .../chatgpt/models/base/__init__.py             |  0
 .../chatgpt/models/base/actor.py                |  0
 .../chatgpt/models/base/critic.py               |  0
 .../{ChatGPT => Chat}/chatgpt/models/base/lm.py |  0
 .../chatgpt/models/base/reward_model.py         |  0
 .../chatgpt/models/bloom/__init__.py            |  0
 .../chatgpt/models/bloom/bloom_actor.py         |  0
 .../chatgpt/models/bloom/bloom_critic.py        |  0
 .../chatgpt/models/bloom/bloom_lm.py            |  0
 .../chatgpt/models/bloom/bloom_rm.py            |  0
 .../chatgpt/models/deberta/__init__.py          |  0
 .../chatgpt/models/deberta/deberta_critic.py    |  0
 .../chatgpt/models/deberta/deberta_rm.py        |  0
 .../chatgpt/models/generation.py                |  0
 .../chatgpt/models/generation_utils.py          |  0
 .../chatgpt/models/gpt/__init__.py              |  0
 .../chatgpt/models/gpt/gpt_actor.py             |  0
 .../chatgpt/models/gpt/gpt_critic.py            |  0
 .../chatgpt/models/gpt/gpt_lm.py                |  0
 .../chatgpt/models/gpt/gpt_rm.py                |  0
 .../chatgpt/models/llama/__init__.py            |  0
 .../chatgpt/models/llama/llama_actor.py         |  0
 .../chatgpt/models/llama/llama_critic.py        |  0
 .../chatgpt/models/llama/llama_lm.py            |  0
 .../chatgpt/models/llama/llama_rm.py            |  0
 .../{ChatGPT => Chat}/chatgpt/models/lora.py    |  0
 .../{ChatGPT => Chat}/chatgpt/models/loss.py    |  0
 .../chatgpt/models/opt/__init__.py              |  0
 .../chatgpt/models/opt/opt_actor.py             |  0
 .../chatgpt/models/opt/opt_critic.py            |  0
 .../chatgpt/models/opt/opt_lm.py                |  0
 .../chatgpt/models/opt/opt_rm.py                |  0
 .../{ChatGPT => Chat}/chatgpt/models/utils.py   |  0
 .../chatgpt/replay_buffer/__init__.py           |  0
 .../chatgpt/replay_buffer/base.py               |  0
 .../chatgpt/replay_buffer/detached.py           |  0
 .../chatgpt/replay_buffer/naive.py              |  0
 .../chatgpt/replay_buffer/utils.py              |  0
 .../chatgpt/trainer/__init__.py                 |  0
 .../{ChatGPT => Chat}/chatgpt/trainer/base.py   |  0
 .../chatgpt/trainer/callbacks/__init__.py       |  0
 .../chatgpt/trainer/callbacks/base.py           |  0
 .../trainer/callbacks/performance_evaluator.py  |  0
 .../trainer/callbacks/save_checkpoint.py        |  0
 .../chatgpt/trainer/detached_base.py            |  0
 .../chatgpt/trainer/detached_ppo.py             |  0
 .../{ChatGPT => Chat}/chatgpt/trainer/ppo.py    |  0
 .../{ChatGPT => Chat}/chatgpt/trainer/rm.py     |  0
 .../{ChatGPT => Chat}/chatgpt/trainer/sft.py    |  0
 .../chatgpt/trainer/strategies/__init__.py      |  0
 .../chatgpt/trainer/strategies/base.py          |  0
 .../chatgpt/trainer/strategies/colossalai.py    |  0
 .../chatgpt/trainer/strategies/ddp.py           |  0
 .../chatgpt/trainer/strategies/naive.py         |  0
 .../chatgpt/trainer/strategies/sampler.py       |  0
 .../{ChatGPT => Chat}/chatgpt/trainer/utils.py  |  0
 .../{ChatGPT => Chat}/chatgpt/utils/__init__.py |  0
 .../chatgpt/utils/tokenizer_utils.py            |  0
 applications/{ChatGPT => Chat}/examples/1m1t.py |  0
 applications/{ChatGPT => Chat}/examples/1m1t.sh |  0
 applications/{ChatGPT => Chat}/examples/1m2t.py |  0
 applications/{ChatGPT => Chat}/examples/1m2t.sh |  0
 applications/{ChatGPT => Chat}/examples/2m1t.py |  0
 applications/{ChatGPT => Chat}/examples/2m1t.sh |  0
 applications/{ChatGPT => Chat}/examples/2m2t.py |  0
 applications/{ChatGPT => Chat}/examples/2m2t.sh |  2 +-
 .../{ChatGPT => Chat}/examples/README.md        |  0
 .../{ChatGPT => Chat}/examples/inference.py     |  0
 .../{ChatGPT => Chat}/examples/requirements.txt |  0
 .../{ChatGPT => Chat}/examples/test_ci.sh       |  0
 .../{ChatGPT => Chat}/examples/train_dummy.py   |  0
 .../{ChatGPT => Chat}/examples/train_dummy.sh   |  0
 .../{ChatGPT => Chat}/examples/train_prompts.py |  0
 .../{ChatGPT => Chat}/examples/train_prompts.sh |  0
 .../examples/train_reward_model.py              |  0
 .../{ChatGPT => Chat}/examples/train_rm.sh      |  0
 .../{ChatGPT => Chat}/examples/train_sft.py     |  0
 .../{ChatGPT => Chat}/examples/train_sft.sh     |  0
 applications/{ChatGPT => Chat}/pytest.ini       |  0
 .../{ChatGPT => Chat}/requirements-test.txt     |  0
 applications/{ChatGPT => Chat}/requirements.txt |  0
 applications/{ChatGPT => Chat}/setup.py         |  0
 .../{ChatGPT => Chat}/tests/__init__.py         |  0
 .../{ChatGPT => Chat}/tests/test_checkpoint.py  |  0
 .../{ChatGPT => Chat}/tests/test_data.py        |  0
 applications/{ChatGPT => Chat}/version.txt      |  0
 104 files changed, 18 insertions(+), 1 deletion(-)
 rename applications/{ChatGPT => Chat}/.gitignore (100%)
 rename applications/{ChatGPT => Chat}/LICENSE (100%)
 rename applications/{ChatGPT => Chat}/README.md (100%)
 rename applications/{ChatGPT => Chat}/benchmarks/README.md (100%)
 rename applications/{ChatGPT => Chat}/benchmarks/benchmark_gpt_dummy.py (100%)
 rename applications/{ChatGPT => Chat}/benchmarks/benchmark_gpt_dummy.sh (100%)
 rename applications/{ChatGPT => Chat}/benchmarks/benchmark_opt_lora_dummy.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/dataset/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/dataset/reward_dataset.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/dataset/sft_dataset.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/dataset/utils.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/experience_maker/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/experience_maker/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/experience_maker/detached.py (90%)
 rename applications/{ChatGPT => Chat}/chatgpt/experience_maker/naive.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/experience_maker/strategy/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/base/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/base/actor.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/base/critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/base/lm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/base/reward_model.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/bloom/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/bloom/bloom_actor.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/bloom/bloom_critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/bloom/bloom_lm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/bloom/bloom_rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/deberta/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/deberta/deberta_critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/deberta/deberta_rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/generation.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/generation_utils.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/gpt/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/gpt/gpt_actor.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/gpt/gpt_critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/gpt/gpt_lm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/gpt/gpt_rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/llama/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/llama/llama_actor.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/llama/llama_critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/llama/llama_lm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/llama/llama_rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/lora.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/loss.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/opt/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/opt/opt_actor.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/opt/opt_critic.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/opt/opt_lm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/opt/opt_rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/models/utils.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/replay_buffer/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/replay_buffer/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/replay_buffer/detached.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/replay_buffer/naive.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/replay_buffer/utils.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/callbacks/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/callbacks/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/callbacks/performance_evaluator.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/callbacks/save_checkpoint.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/detached_base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/detached_ppo.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/ppo.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/rm.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/sft.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/base.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/colossalai.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/ddp.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/naive.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/strategies/sampler.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/trainer/utils.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/utils/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/chatgpt/utils/tokenizer_utils.py (100%)
 rename applications/{ChatGPT => Chat}/examples/1m1t.py (100%)
 rename applications/{ChatGPT => Chat}/examples/1m1t.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/1m2t.py (100%)
 rename applications/{ChatGPT => Chat}/examples/1m2t.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/2m1t.py (100%)
 rename applications/{ChatGPT => Chat}/examples/2m1t.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/2m2t.py (100%)
 rename applications/{ChatGPT => Chat}/examples/2m2t.sh (92%)
 rename applications/{ChatGPT => Chat}/examples/README.md (100%)
 rename applications/{ChatGPT => Chat}/examples/inference.py (100%)
 rename applications/{ChatGPT => Chat}/examples/requirements.txt (100%)
 rename applications/{ChatGPT => Chat}/examples/test_ci.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/train_dummy.py (100%)
 rename applications/{ChatGPT => Chat}/examples/train_dummy.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/train_prompts.py (100%)
 rename applications/{ChatGPT => Chat}/examples/train_prompts.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/train_reward_model.py (100%)
 rename applications/{ChatGPT => Chat}/examples/train_rm.sh (100%)
 rename applications/{ChatGPT => Chat}/examples/train_sft.py (100%)
 rename applications/{ChatGPT => Chat}/examples/train_sft.sh (100%)
 rename applications/{ChatGPT => Chat}/pytest.ini (100%)
 rename applications/{ChatGPT => Chat}/requirements-test.txt (100%)
 rename applications/{ChatGPT => Chat}/requirements.txt (100%)
 rename applications/{ChatGPT => Chat}/setup.py (100%)
 rename applications/{ChatGPT => Chat}/tests/__init__.py (100%)
 rename applications/{ChatGPT => Chat}/tests/test_checkpoint.py (100%)
 rename applications/{ChatGPT => Chat}/tests/test_data.py (100%)
 rename applications/{ChatGPT => Chat}/version.txt (100%)

diff --git a/applications/ChatGPT/.gitignore b/applications/Chat/.gitignore
similarity index 100%
rename from applications/ChatGPT/.gitignore
rename to applications/Chat/.gitignore
diff --git a/applications/ChatGPT/LICENSE b/applications/Chat/LICENSE
similarity index 100%
rename from applications/ChatGPT/LICENSE
rename to applications/Chat/LICENSE
diff --git a/applications/ChatGPT/README.md b/applications/Chat/README.md
similarity index 100%
rename from applications/ChatGPT/README.md
rename to applications/Chat/README.md
diff --git a/applications/ChatGPT/benchmarks/README.md b/applications/Chat/benchmarks/README.md
similarity index 100%
rename from applications/ChatGPT/benchmarks/README.md
rename to applications/Chat/benchmarks/README.md
diff --git a/applications/ChatGPT/benchmarks/benchmark_gpt_dummy.py b/applications/Chat/benchmarks/benchmark_gpt_dummy.py
similarity index 100%
rename from applications/ChatGPT/benchmarks/benchmark_gpt_dummy.py
rename to applications/Chat/benchmarks/benchmark_gpt_dummy.py
diff --git a/applications/ChatGPT/benchmarks/benchmark_gpt_dummy.sh b/applications/Chat/benchmarks/benchmark_gpt_dummy.sh
similarity index 100%
rename from applications/ChatGPT/benchmarks/benchmark_gpt_dummy.sh
rename to applications/Chat/benchmarks/benchmark_gpt_dummy.sh
diff --git a/applications/ChatGPT/benchmarks/benchmark_opt_lora_dummy.py b/applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
similarity index 100%
rename from applications/ChatGPT/benchmarks/benchmark_opt_lora_dummy.py
rename to applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
diff --git a/applications/ChatGPT/chatgpt/__init__.py b/applications/Chat/chatgpt/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/__init__.py
rename to applications/Chat/chatgpt/__init__.py
diff --git a/applications/ChatGPT/chatgpt/dataset/__init__.py b/applications/Chat/chatgpt/dataset/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/dataset/__init__.py
rename to applications/Chat/chatgpt/dataset/__init__.py
diff --git a/applications/ChatGPT/chatgpt/dataset/reward_dataset.py b/applications/Chat/chatgpt/dataset/reward_dataset.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/dataset/reward_dataset.py
rename to applications/Chat/chatgpt/dataset/reward_dataset.py
diff --git a/applications/ChatGPT/chatgpt/dataset/sft_dataset.py b/applications/Chat/chatgpt/dataset/sft_dataset.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/dataset/sft_dataset.py
rename to applications/Chat/chatgpt/dataset/sft_dataset.py
diff --git a/applications/ChatGPT/chatgpt/dataset/utils.py b/applications/Chat/chatgpt/dataset/utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/dataset/utils.py
rename to applications/Chat/chatgpt/dataset/utils.py
diff --git a/applications/ChatGPT/chatgpt/experience_maker/__init__.py b/applications/Chat/chatgpt/experience_maker/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/experience_maker/__init__.py
rename to applications/Chat/chatgpt/experience_maker/__init__.py
diff --git a/applications/ChatGPT/chatgpt/experience_maker/base.py b/applications/Chat/chatgpt/experience_maker/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/experience_maker/base.py
rename to applications/Chat/chatgpt/experience_maker/base.py
diff --git a/applications/ChatGPT/chatgpt/experience_maker/detached.py b/applications/Chat/chatgpt/experience_maker/detached.py
similarity index 90%
rename from applications/ChatGPT/chatgpt/experience_maker/detached.py
rename to applications/Chat/chatgpt/experience_maker/detached.py
index 3cfb5d9d54f2..29689a88a8b9 100644
--- a/applications/ChatGPT/chatgpt/experience_maker/detached.py
+++ b/applications/Chat/chatgpt/experience_maker/detached.py
@@ -99,6 +99,15 @@ def _send_experience(self, experience):
     def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
         self._get_ready()
         sampler = self.strategy.setup_sampler(dataset)
+        prof = torch.profiler.profile(
+                            schedule=torch.profiler.schedule(wait=1, warmup=1, active=18, repeat=1),
+                            on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/1m1t_maker'),
+                            record_shapes=True,
+                            profile_memory=True,
+                            with_stack=True)
+        prof.start()
+        count_time = 0
+        saved = False
         for _ in range(times):
             rand_prompts = sampler.sample(self.experience_batch_size)
             if tokenizer is not None:
@@ -109,6 +118,14 @@ def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None
             experience = self._make_experience(inputs=inputs)
             self._model_visit_lock.release()
             self._send_experience(experience=experience)
+            
+            if count_time >= 20:
+                prof.stop()
+                count_time = 0
+                saved = True
+            if not saved:
+                count_time+=1
+                prof.step()
 
     @ray.method(concurrency_group="model_io")
     def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
diff --git a/applications/ChatGPT/chatgpt/experience_maker/naive.py b/applications/Chat/chatgpt/experience_maker/naive.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/experience_maker/naive.py
rename to applications/Chat/chatgpt/experience_maker/naive.py
diff --git a/applications/ChatGPT/chatgpt/experience_maker/strategy/base.py b/applications/Chat/chatgpt/experience_maker/strategy/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/experience_maker/strategy/base.py
rename to applications/Chat/chatgpt/experience_maker/strategy/base.py
diff --git a/applications/ChatGPT/chatgpt/models/__init__.py b/applications/Chat/chatgpt/models/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/__init__.py
rename to applications/Chat/chatgpt/models/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/base/__init__.py b/applications/Chat/chatgpt/models/base/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/base/__init__.py
rename to applications/Chat/chatgpt/models/base/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/base/actor.py b/applications/Chat/chatgpt/models/base/actor.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/base/actor.py
rename to applications/Chat/chatgpt/models/base/actor.py
diff --git a/applications/ChatGPT/chatgpt/models/base/critic.py b/applications/Chat/chatgpt/models/base/critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/base/critic.py
rename to applications/Chat/chatgpt/models/base/critic.py
diff --git a/applications/ChatGPT/chatgpt/models/base/lm.py b/applications/Chat/chatgpt/models/base/lm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/base/lm.py
rename to applications/Chat/chatgpt/models/base/lm.py
diff --git a/applications/ChatGPT/chatgpt/models/base/reward_model.py b/applications/Chat/chatgpt/models/base/reward_model.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/base/reward_model.py
rename to applications/Chat/chatgpt/models/base/reward_model.py
diff --git a/applications/ChatGPT/chatgpt/models/bloom/__init__.py b/applications/Chat/chatgpt/models/bloom/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/bloom/__init__.py
rename to applications/Chat/chatgpt/models/bloom/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/bloom/bloom_actor.py b/applications/Chat/chatgpt/models/bloom/bloom_actor.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/bloom/bloom_actor.py
rename to applications/Chat/chatgpt/models/bloom/bloom_actor.py
diff --git a/applications/ChatGPT/chatgpt/models/bloom/bloom_critic.py b/applications/Chat/chatgpt/models/bloom/bloom_critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/bloom/bloom_critic.py
rename to applications/Chat/chatgpt/models/bloom/bloom_critic.py
diff --git a/applications/ChatGPT/chatgpt/models/bloom/bloom_lm.py b/applications/Chat/chatgpt/models/bloom/bloom_lm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/bloom/bloom_lm.py
rename to applications/Chat/chatgpt/models/bloom/bloom_lm.py
diff --git a/applications/ChatGPT/chatgpt/models/bloom/bloom_rm.py b/applications/Chat/chatgpt/models/bloom/bloom_rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/bloom/bloom_rm.py
rename to applications/Chat/chatgpt/models/bloom/bloom_rm.py
diff --git a/applications/ChatGPT/chatgpt/models/deberta/__init__.py b/applications/Chat/chatgpt/models/deberta/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/deberta/__init__.py
rename to applications/Chat/chatgpt/models/deberta/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py b/applications/Chat/chatgpt/models/deberta/deberta_critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py
rename to applications/Chat/chatgpt/models/deberta/deberta_critic.py
diff --git a/applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py b/applications/Chat/chatgpt/models/deberta/deberta_rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py
rename to applications/Chat/chatgpt/models/deberta/deberta_rm.py
diff --git a/applications/ChatGPT/chatgpt/models/generation.py b/applications/Chat/chatgpt/models/generation.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/generation.py
rename to applications/Chat/chatgpt/models/generation.py
diff --git a/applications/ChatGPT/chatgpt/models/generation_utils.py b/applications/Chat/chatgpt/models/generation_utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/generation_utils.py
rename to applications/Chat/chatgpt/models/generation_utils.py
diff --git a/applications/ChatGPT/chatgpt/models/gpt/__init__.py b/applications/Chat/chatgpt/models/gpt/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/gpt/__init__.py
rename to applications/Chat/chatgpt/models/gpt/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/gpt/gpt_actor.py b/applications/Chat/chatgpt/models/gpt/gpt_actor.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/gpt/gpt_actor.py
rename to applications/Chat/chatgpt/models/gpt/gpt_actor.py
diff --git a/applications/ChatGPT/chatgpt/models/gpt/gpt_critic.py b/applications/Chat/chatgpt/models/gpt/gpt_critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/gpt/gpt_critic.py
rename to applications/Chat/chatgpt/models/gpt/gpt_critic.py
diff --git a/applications/ChatGPT/chatgpt/models/gpt/gpt_lm.py b/applications/Chat/chatgpt/models/gpt/gpt_lm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/gpt/gpt_lm.py
rename to applications/Chat/chatgpt/models/gpt/gpt_lm.py
diff --git a/applications/ChatGPT/chatgpt/models/gpt/gpt_rm.py b/applications/Chat/chatgpt/models/gpt/gpt_rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/gpt/gpt_rm.py
rename to applications/Chat/chatgpt/models/gpt/gpt_rm.py
diff --git a/applications/ChatGPT/chatgpt/models/llama/__init__.py b/applications/Chat/chatgpt/models/llama/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/llama/__init__.py
rename to applications/Chat/chatgpt/models/llama/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/llama/llama_actor.py b/applications/Chat/chatgpt/models/llama/llama_actor.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/llama/llama_actor.py
rename to applications/Chat/chatgpt/models/llama/llama_actor.py
diff --git a/applications/ChatGPT/chatgpt/models/llama/llama_critic.py b/applications/Chat/chatgpt/models/llama/llama_critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/llama/llama_critic.py
rename to applications/Chat/chatgpt/models/llama/llama_critic.py
diff --git a/applications/ChatGPT/chatgpt/models/llama/llama_lm.py b/applications/Chat/chatgpt/models/llama/llama_lm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/llama/llama_lm.py
rename to applications/Chat/chatgpt/models/llama/llama_lm.py
diff --git a/applications/ChatGPT/chatgpt/models/llama/llama_rm.py b/applications/Chat/chatgpt/models/llama/llama_rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/llama/llama_rm.py
rename to applications/Chat/chatgpt/models/llama/llama_rm.py
diff --git a/applications/ChatGPT/chatgpt/models/lora.py b/applications/Chat/chatgpt/models/lora.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/lora.py
rename to applications/Chat/chatgpt/models/lora.py
diff --git a/applications/ChatGPT/chatgpt/models/loss.py b/applications/Chat/chatgpt/models/loss.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/loss.py
rename to applications/Chat/chatgpt/models/loss.py
diff --git a/applications/ChatGPT/chatgpt/models/opt/__init__.py b/applications/Chat/chatgpt/models/opt/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/opt/__init__.py
rename to applications/Chat/chatgpt/models/opt/__init__.py
diff --git a/applications/ChatGPT/chatgpt/models/opt/opt_actor.py b/applications/Chat/chatgpt/models/opt/opt_actor.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/opt/opt_actor.py
rename to applications/Chat/chatgpt/models/opt/opt_actor.py
diff --git a/applications/ChatGPT/chatgpt/models/opt/opt_critic.py b/applications/Chat/chatgpt/models/opt/opt_critic.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/opt/opt_critic.py
rename to applications/Chat/chatgpt/models/opt/opt_critic.py
diff --git a/applications/ChatGPT/chatgpt/models/opt/opt_lm.py b/applications/Chat/chatgpt/models/opt/opt_lm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/opt/opt_lm.py
rename to applications/Chat/chatgpt/models/opt/opt_lm.py
diff --git a/applications/ChatGPT/chatgpt/models/opt/opt_rm.py b/applications/Chat/chatgpt/models/opt/opt_rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/opt/opt_rm.py
rename to applications/Chat/chatgpt/models/opt/opt_rm.py
diff --git a/applications/ChatGPT/chatgpt/models/utils.py b/applications/Chat/chatgpt/models/utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/models/utils.py
rename to applications/Chat/chatgpt/models/utils.py
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/__init__.py b/applications/Chat/chatgpt/replay_buffer/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/replay_buffer/__init__.py
rename to applications/Chat/chatgpt/replay_buffer/__init__.py
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/base.py b/applications/Chat/chatgpt/replay_buffer/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/replay_buffer/base.py
rename to applications/Chat/chatgpt/replay_buffer/base.py
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/detached.py b/applications/Chat/chatgpt/replay_buffer/detached.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/replay_buffer/detached.py
rename to applications/Chat/chatgpt/replay_buffer/detached.py
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/naive.py b/applications/Chat/chatgpt/replay_buffer/naive.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/replay_buffer/naive.py
rename to applications/Chat/chatgpt/replay_buffer/naive.py
diff --git a/applications/ChatGPT/chatgpt/replay_buffer/utils.py b/applications/Chat/chatgpt/replay_buffer/utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/replay_buffer/utils.py
rename to applications/Chat/chatgpt/replay_buffer/utils.py
diff --git a/applications/ChatGPT/chatgpt/trainer/__init__.py b/applications/Chat/chatgpt/trainer/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/__init__.py
rename to applications/Chat/chatgpt/trainer/__init__.py
diff --git a/applications/ChatGPT/chatgpt/trainer/base.py b/applications/Chat/chatgpt/trainer/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/base.py
rename to applications/Chat/chatgpt/trainer/base.py
diff --git a/applications/ChatGPT/chatgpt/trainer/callbacks/__init__.py b/applications/Chat/chatgpt/trainer/callbacks/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/callbacks/__init__.py
rename to applications/Chat/chatgpt/trainer/callbacks/__init__.py
diff --git a/applications/ChatGPT/chatgpt/trainer/callbacks/base.py b/applications/Chat/chatgpt/trainer/callbacks/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/callbacks/base.py
rename to applications/Chat/chatgpt/trainer/callbacks/base.py
diff --git a/applications/ChatGPT/chatgpt/trainer/callbacks/performance_evaluator.py b/applications/Chat/chatgpt/trainer/callbacks/performance_evaluator.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/callbacks/performance_evaluator.py
rename to applications/Chat/chatgpt/trainer/callbacks/performance_evaluator.py
diff --git a/applications/ChatGPT/chatgpt/trainer/callbacks/save_checkpoint.py b/applications/Chat/chatgpt/trainer/callbacks/save_checkpoint.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/callbacks/save_checkpoint.py
rename to applications/Chat/chatgpt/trainer/callbacks/save_checkpoint.py
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_base.py b/applications/Chat/chatgpt/trainer/detached_base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/detached_base.py
rename to applications/Chat/chatgpt/trainer/detached_base.py
diff --git a/applications/ChatGPT/chatgpt/trainer/detached_ppo.py b/applications/Chat/chatgpt/trainer/detached_ppo.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/detached_ppo.py
rename to applications/Chat/chatgpt/trainer/detached_ppo.py
diff --git a/applications/ChatGPT/chatgpt/trainer/ppo.py b/applications/Chat/chatgpt/trainer/ppo.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/ppo.py
rename to applications/Chat/chatgpt/trainer/ppo.py
diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/Chat/chatgpt/trainer/rm.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/rm.py
rename to applications/Chat/chatgpt/trainer/rm.py
diff --git a/applications/ChatGPT/chatgpt/trainer/sft.py b/applications/Chat/chatgpt/trainer/sft.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/sft.py
rename to applications/Chat/chatgpt/trainer/sft.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/__init__.py b/applications/Chat/chatgpt/trainer/strategies/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/__init__.py
rename to applications/Chat/chatgpt/trainer/strategies/__init__.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/base.py b/applications/Chat/chatgpt/trainer/strategies/base.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/base.py
rename to applications/Chat/chatgpt/trainer/strategies/base.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py b/applications/Chat/chatgpt/trainer/strategies/colossalai.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
rename to applications/Chat/chatgpt/trainer/strategies/colossalai.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/ddp.py b/applications/Chat/chatgpt/trainer/strategies/ddp.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/ddp.py
rename to applications/Chat/chatgpt/trainer/strategies/ddp.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/naive.py b/applications/Chat/chatgpt/trainer/strategies/naive.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/naive.py
rename to applications/Chat/chatgpt/trainer/strategies/naive.py
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/sampler.py b/applications/Chat/chatgpt/trainer/strategies/sampler.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/strategies/sampler.py
rename to applications/Chat/chatgpt/trainer/strategies/sampler.py
diff --git a/applications/ChatGPT/chatgpt/trainer/utils.py b/applications/Chat/chatgpt/trainer/utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/trainer/utils.py
rename to applications/Chat/chatgpt/trainer/utils.py
diff --git a/applications/ChatGPT/chatgpt/utils/__init__.py b/applications/Chat/chatgpt/utils/__init__.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/utils/__init__.py
rename to applications/Chat/chatgpt/utils/__init__.py
diff --git a/applications/ChatGPT/chatgpt/utils/tokenizer_utils.py b/applications/Chat/chatgpt/utils/tokenizer_utils.py
similarity index 100%
rename from applications/ChatGPT/chatgpt/utils/tokenizer_utils.py
rename to applications/Chat/chatgpt/utils/tokenizer_utils.py
diff --git a/applications/ChatGPT/examples/1m1t.py b/applications/Chat/examples/1m1t.py
similarity index 100%
rename from applications/ChatGPT/examples/1m1t.py
rename to applications/Chat/examples/1m1t.py
diff --git a/applications/ChatGPT/examples/1m1t.sh b/applications/Chat/examples/1m1t.sh
similarity index 100%
rename from applications/ChatGPT/examples/1m1t.sh
rename to applications/Chat/examples/1m1t.sh
diff --git a/applications/ChatGPT/examples/1m2t.py b/applications/Chat/examples/1m2t.py
similarity index 100%
rename from applications/ChatGPT/examples/1m2t.py
rename to applications/Chat/examples/1m2t.py
diff --git a/applications/ChatGPT/examples/1m2t.sh b/applications/Chat/examples/1m2t.sh
similarity index 100%
rename from applications/ChatGPT/examples/1m2t.sh
rename to applications/Chat/examples/1m2t.sh
diff --git a/applications/ChatGPT/examples/2m1t.py b/applications/Chat/examples/2m1t.py
similarity index 100%
rename from applications/ChatGPT/examples/2m1t.py
rename to applications/Chat/examples/2m1t.py
diff --git a/applications/ChatGPT/examples/2m1t.sh b/applications/Chat/examples/2m1t.sh
similarity index 100%
rename from applications/ChatGPT/examples/2m1t.sh
rename to applications/Chat/examples/2m1t.sh
diff --git a/applications/ChatGPT/examples/2m2t.py b/applications/Chat/examples/2m2t.py
similarity index 100%
rename from applications/ChatGPT/examples/2m2t.py
rename to applications/Chat/examples/2m2t.py
diff --git a/applications/ChatGPT/examples/2m2t.sh b/applications/Chat/examples/2m2t.sh
similarity index 92%
rename from applications/ChatGPT/examples/2m2t.sh
rename to applications/Chat/examples/2m2t.sh
index 9c49d97845c5..79088ad1439c 100644
--- a/applications/ChatGPT/examples/2m2t.sh
+++ b/applications/Chat/examples/2m2t.sh
@@ -20,5 +20,5 @@ export RAY_NAMESPACE="lccsr"
 
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
-    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --num_episodes 2 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug 
\ No newline at end of file
diff --git a/applications/ChatGPT/examples/README.md b/applications/Chat/examples/README.md
similarity index 100%
rename from applications/ChatGPT/examples/README.md
rename to applications/Chat/examples/README.md
diff --git a/applications/ChatGPT/examples/inference.py b/applications/Chat/examples/inference.py
similarity index 100%
rename from applications/ChatGPT/examples/inference.py
rename to applications/Chat/examples/inference.py
diff --git a/applications/ChatGPT/examples/requirements.txt b/applications/Chat/examples/requirements.txt
similarity index 100%
rename from applications/ChatGPT/examples/requirements.txt
rename to applications/Chat/examples/requirements.txt
diff --git a/applications/ChatGPT/examples/test_ci.sh b/applications/Chat/examples/test_ci.sh
similarity index 100%
rename from applications/ChatGPT/examples/test_ci.sh
rename to applications/Chat/examples/test_ci.sh
diff --git a/applications/ChatGPT/examples/train_dummy.py b/applications/Chat/examples/train_dummy.py
similarity index 100%
rename from applications/ChatGPT/examples/train_dummy.py
rename to applications/Chat/examples/train_dummy.py
diff --git a/applications/ChatGPT/examples/train_dummy.sh b/applications/Chat/examples/train_dummy.sh
similarity index 100%
rename from applications/ChatGPT/examples/train_dummy.sh
rename to applications/Chat/examples/train_dummy.sh
diff --git a/applications/ChatGPT/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
similarity index 100%
rename from applications/ChatGPT/examples/train_prompts.py
rename to applications/Chat/examples/train_prompts.py
diff --git a/applications/ChatGPT/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh
similarity index 100%
rename from applications/ChatGPT/examples/train_prompts.sh
rename to applications/Chat/examples/train_prompts.sh
diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py
similarity index 100%
rename from applications/ChatGPT/examples/train_reward_model.py
rename to applications/Chat/examples/train_reward_model.py
diff --git a/applications/ChatGPT/examples/train_rm.sh b/applications/Chat/examples/train_rm.sh
similarity index 100%
rename from applications/ChatGPT/examples/train_rm.sh
rename to applications/Chat/examples/train_rm.sh
diff --git a/applications/ChatGPT/examples/train_sft.py b/applications/Chat/examples/train_sft.py
similarity index 100%
rename from applications/ChatGPT/examples/train_sft.py
rename to applications/Chat/examples/train_sft.py
diff --git a/applications/ChatGPT/examples/train_sft.sh b/applications/Chat/examples/train_sft.sh
similarity index 100%
rename from applications/ChatGPT/examples/train_sft.sh
rename to applications/Chat/examples/train_sft.sh
diff --git a/applications/ChatGPT/pytest.ini b/applications/Chat/pytest.ini
similarity index 100%
rename from applications/ChatGPT/pytest.ini
rename to applications/Chat/pytest.ini
diff --git a/applications/ChatGPT/requirements-test.txt b/applications/Chat/requirements-test.txt
similarity index 100%
rename from applications/ChatGPT/requirements-test.txt
rename to applications/Chat/requirements-test.txt
diff --git a/applications/ChatGPT/requirements.txt b/applications/Chat/requirements.txt
similarity index 100%
rename from applications/ChatGPT/requirements.txt
rename to applications/Chat/requirements.txt
diff --git a/applications/ChatGPT/setup.py b/applications/Chat/setup.py
similarity index 100%
rename from applications/ChatGPT/setup.py
rename to applications/Chat/setup.py
diff --git a/applications/ChatGPT/tests/__init__.py b/applications/Chat/tests/__init__.py
similarity index 100%
rename from applications/ChatGPT/tests/__init__.py
rename to applications/Chat/tests/__init__.py
diff --git a/applications/ChatGPT/tests/test_checkpoint.py b/applications/Chat/tests/test_checkpoint.py
similarity index 100%
rename from applications/ChatGPT/tests/test_checkpoint.py
rename to applications/Chat/tests/test_checkpoint.py
diff --git a/applications/ChatGPT/tests/test_data.py b/applications/Chat/tests/test_data.py
similarity index 100%
rename from applications/ChatGPT/tests/test_data.py
rename to applications/Chat/tests/test_data.py
diff --git a/applications/ChatGPT/version.txt b/applications/Chat/version.txt
similarity index 100%
rename from applications/ChatGPT/version.txt
rename to applications/Chat/version.txt

From 7e5c8f271cb7286fdaa9dfe1786ea7abda692f96 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 30 Mar 2023 14:35:47 +0800
Subject: [PATCH 29/36] rename to sync with upstream

---
 applications/Chat/{chatgpt => coati}/__init__.py                  | 0
 applications/Chat/{chatgpt => coati}/dataset/__init__.py          | 0
 applications/Chat/{chatgpt => coati}/dataset/reward_dataset.py    | 0
 applications/Chat/{chatgpt => coati}/dataset/sft_dataset.py       | 0
 applications/Chat/{chatgpt => coati}/dataset/utils.py             | 0
 applications/Chat/{chatgpt => coati}/experience_maker/__init__.py | 0
 applications/Chat/{chatgpt => coati}/experience_maker/base.py     | 0
 applications/Chat/{chatgpt => coati}/experience_maker/detached.py | 0
 applications/Chat/{chatgpt => coati}/experience_maker/naive.py    | 0
 .../Chat/{chatgpt => coati}/experience_maker/strategy/base.py     | 0
 applications/Chat/{chatgpt => coati}/models/__init__.py           | 0
 applications/Chat/{chatgpt => coati}/models/base/__init__.py      | 0
 applications/Chat/{chatgpt => coati}/models/base/actor.py         | 0
 applications/Chat/{chatgpt => coati}/models/base/critic.py        | 0
 applications/Chat/{chatgpt => coati}/models/base/lm.py            | 0
 applications/Chat/{chatgpt => coati}/models/base/reward_model.py  | 0
 applications/Chat/{chatgpt => coati}/models/bloom/__init__.py     | 0
 applications/Chat/{chatgpt => coati}/models/bloom/bloom_actor.py  | 0
 applications/Chat/{chatgpt => coati}/models/bloom/bloom_critic.py | 0
 applications/Chat/{chatgpt => coati}/models/bloom/bloom_lm.py     | 0
 applications/Chat/{chatgpt => coati}/models/bloom/bloom_rm.py     | 0
 applications/Chat/{chatgpt => coati}/models/deberta/__init__.py   | 0
 .../Chat/{chatgpt => coati}/models/deberta/deberta_critic.py      | 0
 applications/Chat/{chatgpt => coati}/models/deberta/deberta_rm.py | 0
 applications/Chat/{chatgpt => coati}/models/generation.py         | 0
 applications/Chat/{chatgpt => coati}/models/generation_utils.py   | 0
 applications/Chat/{chatgpt => coati}/models/gpt/__init__.py       | 0
 applications/Chat/{chatgpt => coati}/models/gpt/gpt_actor.py      | 0
 applications/Chat/{chatgpt => coati}/models/gpt/gpt_critic.py     | 0
 applications/Chat/{chatgpt => coati}/models/gpt/gpt_lm.py         | 0
 applications/Chat/{chatgpt => coati}/models/gpt/gpt_rm.py         | 0
 applications/Chat/{chatgpt => coati}/models/llama/__init__.py     | 0
 applications/Chat/{chatgpt => coati}/models/llama/llama_actor.py  | 0
 applications/Chat/{chatgpt => coati}/models/llama/llama_critic.py | 0
 applications/Chat/{chatgpt => coati}/models/llama/llama_lm.py     | 0
 applications/Chat/{chatgpt => coati}/models/llama/llama_rm.py     | 0
 applications/Chat/{chatgpt => coati}/models/lora.py               | 0
 applications/Chat/{chatgpt => coati}/models/loss.py               | 0
 applications/Chat/{chatgpt => coati}/models/opt/__init__.py       | 0
 applications/Chat/{chatgpt => coati}/models/opt/opt_actor.py      | 0
 applications/Chat/{chatgpt => coati}/models/opt/opt_critic.py     | 0
 applications/Chat/{chatgpt => coati}/models/opt/opt_lm.py         | 0
 applications/Chat/{chatgpt => coati}/models/opt/opt_rm.py         | 0
 applications/Chat/{chatgpt => coati}/models/utils.py              | 0
 applications/Chat/{chatgpt => coati}/replay_buffer/__init__.py    | 0
 applications/Chat/{chatgpt => coati}/replay_buffer/base.py        | 0
 applications/Chat/{chatgpt => coati}/replay_buffer/detached.py    | 0
 applications/Chat/{chatgpt => coati}/replay_buffer/naive.py       | 0
 applications/Chat/{chatgpt => coati}/replay_buffer/utils.py       | 0
 applications/Chat/{chatgpt => coati}/trainer/__init__.py          | 0
 applications/Chat/{chatgpt => coati}/trainer/base.py              | 0
 .../Chat/{chatgpt => coati}/trainer/callbacks/__init__.py         | 0
 applications/Chat/{chatgpt => coati}/trainer/callbacks/base.py    | 0
 .../{chatgpt => coati}/trainer/callbacks/performance_evaluator.py | 0
 .../Chat/{chatgpt => coati}/trainer/callbacks/save_checkpoint.py  | 0
 applications/Chat/{chatgpt => coati}/trainer/detached_base.py     | 0
 applications/Chat/{chatgpt => coati}/trainer/detached_ppo.py      | 0
 applications/Chat/{chatgpt => coati}/trainer/ppo.py               | 0
 applications/Chat/{chatgpt => coati}/trainer/rm.py                | 0
 applications/Chat/{chatgpt => coati}/trainer/sft.py               | 0
 .../Chat/{chatgpt => coati}/trainer/strategies/__init__.py        | 0
 applications/Chat/{chatgpt => coati}/trainer/strategies/base.py   | 0
 .../Chat/{chatgpt => coati}/trainer/strategies/colossalai.py      | 0
 applications/Chat/{chatgpt => coati}/trainer/strategies/ddp.py    | 0
 applications/Chat/{chatgpt => coati}/trainer/strategies/naive.py  | 0
 .../Chat/{chatgpt => coati}/trainer/strategies/sampler.py         | 0
 applications/Chat/{chatgpt => coati}/trainer/utils.py             | 0
 applications/Chat/{chatgpt => coati}/utils/__init__.py            | 0
 applications/Chat/{chatgpt => coati}/utils/tokenizer_utils.py     | 0
 69 files changed, 0 insertions(+), 0 deletions(-)
 rename applications/Chat/{chatgpt => coati}/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/dataset/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/dataset/reward_dataset.py (100%)
 rename applications/Chat/{chatgpt => coati}/dataset/sft_dataset.py (100%)
 rename applications/Chat/{chatgpt => coati}/dataset/utils.py (100%)
 rename applications/Chat/{chatgpt => coati}/experience_maker/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/experience_maker/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/experience_maker/detached.py (100%)
 rename applications/Chat/{chatgpt => coati}/experience_maker/naive.py (100%)
 rename applications/Chat/{chatgpt => coati}/experience_maker/strategy/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/base/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/base/actor.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/base/critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/base/lm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/base/reward_model.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/bloom/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/bloom/bloom_actor.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/bloom/bloom_critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/bloom/bloom_lm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/bloom/bloom_rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/deberta/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/deberta/deberta_critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/deberta/deberta_rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/generation.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/generation_utils.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/gpt/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/gpt/gpt_actor.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/gpt/gpt_critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/gpt/gpt_lm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/gpt/gpt_rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/llama/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/llama/llama_actor.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/llama/llama_critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/llama/llama_lm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/llama/llama_rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/lora.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/loss.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/opt/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/opt/opt_actor.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/opt/opt_critic.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/opt/opt_lm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/opt/opt_rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/models/utils.py (100%)
 rename applications/Chat/{chatgpt => coati}/replay_buffer/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/replay_buffer/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/replay_buffer/detached.py (100%)
 rename applications/Chat/{chatgpt => coati}/replay_buffer/naive.py (100%)
 rename applications/Chat/{chatgpt => coati}/replay_buffer/utils.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/callbacks/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/callbacks/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/callbacks/performance_evaluator.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/callbacks/save_checkpoint.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/detached_base.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/detached_ppo.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/ppo.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/rm.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/sft.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/base.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/colossalai.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/ddp.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/naive.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/strategies/sampler.py (100%)
 rename applications/Chat/{chatgpt => coati}/trainer/utils.py (100%)
 rename applications/Chat/{chatgpt => coati}/utils/__init__.py (100%)
 rename applications/Chat/{chatgpt => coati}/utils/tokenizer_utils.py (100%)

diff --git a/applications/Chat/chatgpt/__init__.py b/applications/Chat/coati/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/__init__.py
rename to applications/Chat/coati/__init__.py
diff --git a/applications/Chat/chatgpt/dataset/__init__.py b/applications/Chat/coati/dataset/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/dataset/__init__.py
rename to applications/Chat/coati/dataset/__init__.py
diff --git a/applications/Chat/chatgpt/dataset/reward_dataset.py b/applications/Chat/coati/dataset/reward_dataset.py
similarity index 100%
rename from applications/Chat/chatgpt/dataset/reward_dataset.py
rename to applications/Chat/coati/dataset/reward_dataset.py
diff --git a/applications/Chat/chatgpt/dataset/sft_dataset.py b/applications/Chat/coati/dataset/sft_dataset.py
similarity index 100%
rename from applications/Chat/chatgpt/dataset/sft_dataset.py
rename to applications/Chat/coati/dataset/sft_dataset.py
diff --git a/applications/Chat/chatgpt/dataset/utils.py b/applications/Chat/coati/dataset/utils.py
similarity index 100%
rename from applications/Chat/chatgpt/dataset/utils.py
rename to applications/Chat/coati/dataset/utils.py
diff --git a/applications/Chat/chatgpt/experience_maker/__init__.py b/applications/Chat/coati/experience_maker/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/experience_maker/__init__.py
rename to applications/Chat/coati/experience_maker/__init__.py
diff --git a/applications/Chat/chatgpt/experience_maker/base.py b/applications/Chat/coati/experience_maker/base.py
similarity index 100%
rename from applications/Chat/chatgpt/experience_maker/base.py
rename to applications/Chat/coati/experience_maker/base.py
diff --git a/applications/Chat/chatgpt/experience_maker/detached.py b/applications/Chat/coati/experience_maker/detached.py
similarity index 100%
rename from applications/Chat/chatgpt/experience_maker/detached.py
rename to applications/Chat/coati/experience_maker/detached.py
diff --git a/applications/Chat/chatgpt/experience_maker/naive.py b/applications/Chat/coati/experience_maker/naive.py
similarity index 100%
rename from applications/Chat/chatgpt/experience_maker/naive.py
rename to applications/Chat/coati/experience_maker/naive.py
diff --git a/applications/Chat/chatgpt/experience_maker/strategy/base.py b/applications/Chat/coati/experience_maker/strategy/base.py
similarity index 100%
rename from applications/Chat/chatgpt/experience_maker/strategy/base.py
rename to applications/Chat/coati/experience_maker/strategy/base.py
diff --git a/applications/Chat/chatgpt/models/__init__.py b/applications/Chat/coati/models/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/__init__.py
rename to applications/Chat/coati/models/__init__.py
diff --git a/applications/Chat/chatgpt/models/base/__init__.py b/applications/Chat/coati/models/base/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/base/__init__.py
rename to applications/Chat/coati/models/base/__init__.py
diff --git a/applications/Chat/chatgpt/models/base/actor.py b/applications/Chat/coati/models/base/actor.py
similarity index 100%
rename from applications/Chat/chatgpt/models/base/actor.py
rename to applications/Chat/coati/models/base/actor.py
diff --git a/applications/Chat/chatgpt/models/base/critic.py b/applications/Chat/coati/models/base/critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/base/critic.py
rename to applications/Chat/coati/models/base/critic.py
diff --git a/applications/Chat/chatgpt/models/base/lm.py b/applications/Chat/coati/models/base/lm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/base/lm.py
rename to applications/Chat/coati/models/base/lm.py
diff --git a/applications/Chat/chatgpt/models/base/reward_model.py b/applications/Chat/coati/models/base/reward_model.py
similarity index 100%
rename from applications/Chat/chatgpt/models/base/reward_model.py
rename to applications/Chat/coati/models/base/reward_model.py
diff --git a/applications/Chat/chatgpt/models/bloom/__init__.py b/applications/Chat/coati/models/bloom/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/bloom/__init__.py
rename to applications/Chat/coati/models/bloom/__init__.py
diff --git a/applications/Chat/chatgpt/models/bloom/bloom_actor.py b/applications/Chat/coati/models/bloom/bloom_actor.py
similarity index 100%
rename from applications/Chat/chatgpt/models/bloom/bloom_actor.py
rename to applications/Chat/coati/models/bloom/bloom_actor.py
diff --git a/applications/Chat/chatgpt/models/bloom/bloom_critic.py b/applications/Chat/coati/models/bloom/bloom_critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/bloom/bloom_critic.py
rename to applications/Chat/coati/models/bloom/bloom_critic.py
diff --git a/applications/Chat/chatgpt/models/bloom/bloom_lm.py b/applications/Chat/coati/models/bloom/bloom_lm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/bloom/bloom_lm.py
rename to applications/Chat/coati/models/bloom/bloom_lm.py
diff --git a/applications/Chat/chatgpt/models/bloom/bloom_rm.py b/applications/Chat/coati/models/bloom/bloom_rm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/bloom/bloom_rm.py
rename to applications/Chat/coati/models/bloom/bloom_rm.py
diff --git a/applications/Chat/chatgpt/models/deberta/__init__.py b/applications/Chat/coati/models/deberta/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/deberta/__init__.py
rename to applications/Chat/coati/models/deberta/__init__.py
diff --git a/applications/Chat/chatgpt/models/deberta/deberta_critic.py b/applications/Chat/coati/models/deberta/deberta_critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/deberta/deberta_critic.py
rename to applications/Chat/coati/models/deberta/deberta_critic.py
diff --git a/applications/Chat/chatgpt/models/deberta/deberta_rm.py b/applications/Chat/coati/models/deberta/deberta_rm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/deberta/deberta_rm.py
rename to applications/Chat/coati/models/deberta/deberta_rm.py
diff --git a/applications/Chat/chatgpt/models/generation.py b/applications/Chat/coati/models/generation.py
similarity index 100%
rename from applications/Chat/chatgpt/models/generation.py
rename to applications/Chat/coati/models/generation.py
diff --git a/applications/Chat/chatgpt/models/generation_utils.py b/applications/Chat/coati/models/generation_utils.py
similarity index 100%
rename from applications/Chat/chatgpt/models/generation_utils.py
rename to applications/Chat/coati/models/generation_utils.py
diff --git a/applications/Chat/chatgpt/models/gpt/__init__.py b/applications/Chat/coati/models/gpt/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/gpt/__init__.py
rename to applications/Chat/coati/models/gpt/__init__.py
diff --git a/applications/Chat/chatgpt/models/gpt/gpt_actor.py b/applications/Chat/coati/models/gpt/gpt_actor.py
similarity index 100%
rename from applications/Chat/chatgpt/models/gpt/gpt_actor.py
rename to applications/Chat/coati/models/gpt/gpt_actor.py
diff --git a/applications/Chat/chatgpt/models/gpt/gpt_critic.py b/applications/Chat/coati/models/gpt/gpt_critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/gpt/gpt_critic.py
rename to applications/Chat/coati/models/gpt/gpt_critic.py
diff --git a/applications/Chat/chatgpt/models/gpt/gpt_lm.py b/applications/Chat/coati/models/gpt/gpt_lm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/gpt/gpt_lm.py
rename to applications/Chat/coati/models/gpt/gpt_lm.py
diff --git a/applications/Chat/chatgpt/models/gpt/gpt_rm.py b/applications/Chat/coati/models/gpt/gpt_rm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/gpt/gpt_rm.py
rename to applications/Chat/coati/models/gpt/gpt_rm.py
diff --git a/applications/Chat/chatgpt/models/llama/__init__.py b/applications/Chat/coati/models/llama/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/llama/__init__.py
rename to applications/Chat/coati/models/llama/__init__.py
diff --git a/applications/Chat/chatgpt/models/llama/llama_actor.py b/applications/Chat/coati/models/llama/llama_actor.py
similarity index 100%
rename from applications/Chat/chatgpt/models/llama/llama_actor.py
rename to applications/Chat/coati/models/llama/llama_actor.py
diff --git a/applications/Chat/chatgpt/models/llama/llama_critic.py b/applications/Chat/coati/models/llama/llama_critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/llama/llama_critic.py
rename to applications/Chat/coati/models/llama/llama_critic.py
diff --git a/applications/Chat/chatgpt/models/llama/llama_lm.py b/applications/Chat/coati/models/llama/llama_lm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/llama/llama_lm.py
rename to applications/Chat/coati/models/llama/llama_lm.py
diff --git a/applications/Chat/chatgpt/models/llama/llama_rm.py b/applications/Chat/coati/models/llama/llama_rm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/llama/llama_rm.py
rename to applications/Chat/coati/models/llama/llama_rm.py
diff --git a/applications/Chat/chatgpt/models/lora.py b/applications/Chat/coati/models/lora.py
similarity index 100%
rename from applications/Chat/chatgpt/models/lora.py
rename to applications/Chat/coati/models/lora.py
diff --git a/applications/Chat/chatgpt/models/loss.py b/applications/Chat/coati/models/loss.py
similarity index 100%
rename from applications/Chat/chatgpt/models/loss.py
rename to applications/Chat/coati/models/loss.py
diff --git a/applications/Chat/chatgpt/models/opt/__init__.py b/applications/Chat/coati/models/opt/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/models/opt/__init__.py
rename to applications/Chat/coati/models/opt/__init__.py
diff --git a/applications/Chat/chatgpt/models/opt/opt_actor.py b/applications/Chat/coati/models/opt/opt_actor.py
similarity index 100%
rename from applications/Chat/chatgpt/models/opt/opt_actor.py
rename to applications/Chat/coati/models/opt/opt_actor.py
diff --git a/applications/Chat/chatgpt/models/opt/opt_critic.py b/applications/Chat/coati/models/opt/opt_critic.py
similarity index 100%
rename from applications/Chat/chatgpt/models/opt/opt_critic.py
rename to applications/Chat/coati/models/opt/opt_critic.py
diff --git a/applications/Chat/chatgpt/models/opt/opt_lm.py b/applications/Chat/coati/models/opt/opt_lm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/opt/opt_lm.py
rename to applications/Chat/coati/models/opt/opt_lm.py
diff --git a/applications/Chat/chatgpt/models/opt/opt_rm.py b/applications/Chat/coati/models/opt/opt_rm.py
similarity index 100%
rename from applications/Chat/chatgpt/models/opt/opt_rm.py
rename to applications/Chat/coati/models/opt/opt_rm.py
diff --git a/applications/Chat/chatgpt/models/utils.py b/applications/Chat/coati/models/utils.py
similarity index 100%
rename from applications/Chat/chatgpt/models/utils.py
rename to applications/Chat/coati/models/utils.py
diff --git a/applications/Chat/chatgpt/replay_buffer/__init__.py b/applications/Chat/coati/replay_buffer/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/replay_buffer/__init__.py
rename to applications/Chat/coati/replay_buffer/__init__.py
diff --git a/applications/Chat/chatgpt/replay_buffer/base.py b/applications/Chat/coati/replay_buffer/base.py
similarity index 100%
rename from applications/Chat/chatgpt/replay_buffer/base.py
rename to applications/Chat/coati/replay_buffer/base.py
diff --git a/applications/Chat/chatgpt/replay_buffer/detached.py b/applications/Chat/coati/replay_buffer/detached.py
similarity index 100%
rename from applications/Chat/chatgpt/replay_buffer/detached.py
rename to applications/Chat/coati/replay_buffer/detached.py
diff --git a/applications/Chat/chatgpt/replay_buffer/naive.py b/applications/Chat/coati/replay_buffer/naive.py
similarity index 100%
rename from applications/Chat/chatgpt/replay_buffer/naive.py
rename to applications/Chat/coati/replay_buffer/naive.py
diff --git a/applications/Chat/chatgpt/replay_buffer/utils.py b/applications/Chat/coati/replay_buffer/utils.py
similarity index 100%
rename from applications/Chat/chatgpt/replay_buffer/utils.py
rename to applications/Chat/coati/replay_buffer/utils.py
diff --git a/applications/Chat/chatgpt/trainer/__init__.py b/applications/Chat/coati/trainer/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/__init__.py
rename to applications/Chat/coati/trainer/__init__.py
diff --git a/applications/Chat/chatgpt/trainer/base.py b/applications/Chat/coati/trainer/base.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/base.py
rename to applications/Chat/coati/trainer/base.py
diff --git a/applications/Chat/chatgpt/trainer/callbacks/__init__.py b/applications/Chat/coati/trainer/callbacks/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/callbacks/__init__.py
rename to applications/Chat/coati/trainer/callbacks/__init__.py
diff --git a/applications/Chat/chatgpt/trainer/callbacks/base.py b/applications/Chat/coati/trainer/callbacks/base.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/callbacks/base.py
rename to applications/Chat/coati/trainer/callbacks/base.py
diff --git a/applications/Chat/chatgpt/trainer/callbacks/performance_evaluator.py b/applications/Chat/coati/trainer/callbacks/performance_evaluator.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/callbacks/performance_evaluator.py
rename to applications/Chat/coati/trainer/callbacks/performance_evaluator.py
diff --git a/applications/Chat/chatgpt/trainer/callbacks/save_checkpoint.py b/applications/Chat/coati/trainer/callbacks/save_checkpoint.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/callbacks/save_checkpoint.py
rename to applications/Chat/coati/trainer/callbacks/save_checkpoint.py
diff --git a/applications/Chat/chatgpt/trainer/detached_base.py b/applications/Chat/coati/trainer/detached_base.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/detached_base.py
rename to applications/Chat/coati/trainer/detached_base.py
diff --git a/applications/Chat/chatgpt/trainer/detached_ppo.py b/applications/Chat/coati/trainer/detached_ppo.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/detached_ppo.py
rename to applications/Chat/coati/trainer/detached_ppo.py
diff --git a/applications/Chat/chatgpt/trainer/ppo.py b/applications/Chat/coati/trainer/ppo.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/ppo.py
rename to applications/Chat/coati/trainer/ppo.py
diff --git a/applications/Chat/chatgpt/trainer/rm.py b/applications/Chat/coati/trainer/rm.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/rm.py
rename to applications/Chat/coati/trainer/rm.py
diff --git a/applications/Chat/chatgpt/trainer/sft.py b/applications/Chat/coati/trainer/sft.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/sft.py
rename to applications/Chat/coati/trainer/sft.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/__init__.py b/applications/Chat/coati/trainer/strategies/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/__init__.py
rename to applications/Chat/coati/trainer/strategies/__init__.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/base.py b/applications/Chat/coati/trainer/strategies/base.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/base.py
rename to applications/Chat/coati/trainer/strategies/base.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/colossalai.py b/applications/Chat/coati/trainer/strategies/colossalai.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/colossalai.py
rename to applications/Chat/coati/trainer/strategies/colossalai.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/ddp.py b/applications/Chat/coati/trainer/strategies/ddp.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/ddp.py
rename to applications/Chat/coati/trainer/strategies/ddp.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/naive.py b/applications/Chat/coati/trainer/strategies/naive.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/naive.py
rename to applications/Chat/coati/trainer/strategies/naive.py
diff --git a/applications/Chat/chatgpt/trainer/strategies/sampler.py b/applications/Chat/coati/trainer/strategies/sampler.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/strategies/sampler.py
rename to applications/Chat/coati/trainer/strategies/sampler.py
diff --git a/applications/Chat/chatgpt/trainer/utils.py b/applications/Chat/coati/trainer/utils.py
similarity index 100%
rename from applications/Chat/chatgpt/trainer/utils.py
rename to applications/Chat/coati/trainer/utils.py
diff --git a/applications/Chat/chatgpt/utils/__init__.py b/applications/Chat/coati/utils/__init__.py
similarity index 100%
rename from applications/Chat/chatgpt/utils/__init__.py
rename to applications/Chat/coati/utils/__init__.py
diff --git a/applications/Chat/chatgpt/utils/tokenizer_utils.py b/applications/Chat/coati/utils/tokenizer_utils.py
similarity index 100%
rename from applications/Chat/chatgpt/utils/tokenizer_utils.py
rename to applications/Chat/coati/utils/tokenizer_utils.py

From c0649c326e60bcc6bbd7f7ad38e50a799bc0e672 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 30 Mar 2023 15:08:11 +0800
Subject: [PATCH 30/36] coati rename

---
 applications/Chat/coati/experience_maker/detached.py |  8 ++++----
 applications/Chat/coati/replay_buffer/detached.py    |  2 +-
 applications/Chat/coati/trainer/detached_base.py     |  4 ++--
 applications/Chat/coati/trainer/detached_ppo.py      | 12 ++++++------
 applications/Chat/examples/1m1t.py                   |  6 +++---
 applications/Chat/examples/1m2t.py                   |  6 +++---
 applications/Chat/examples/2m1t.py                   |  6 +++---
 applications/Chat/examples/2m2t.py                   |  6 +++---
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/applications/Chat/coati/experience_maker/detached.py b/applications/Chat/coati/experience_maker/detached.py
index 29689a88a8b9..91c7718d601a 100644
--- a/applications/Chat/coati/experience_maker/detached.py
+++ b/applications/Chat/coati/experience_maker/detached.py
@@ -6,10 +6,10 @@
 from ray.exceptions import GetTimeoutError
 from torch import Tensor
 import torch.nn as nn
-from chatgpt.models.base import Actor, Critic, RewardModel
-from chatgpt.trainer.strategies.sampler import DistributedSampler
-from chatgpt.trainer.strategies import Strategy
-from chatgpt.trainer.utils import is_rank_0, get_strategy_from_args, set_dist_env
+from coati.models.base import Actor, Critic, RewardModel
+from coati.trainer.strategies.sampler import DistributedSampler
+from coati.trainer.strategies import Strategy
+from coati.trainer.utils import is_rank_0, get_strategy_from_args, set_dist_env
 from copy import deepcopy
 from threading import Lock
 import time
diff --git a/applications/Chat/coati/replay_buffer/detached.py b/applications/Chat/coati/replay_buffer/detached.py
index c21c8b4b38f8..3f968af400f9 100644
--- a/applications/Chat/coati/replay_buffer/detached.py
+++ b/applications/Chat/coati/replay_buffer/detached.py
@@ -5,7 +5,7 @@
 # from torch.multiprocessing import Queue
 from ray.util.queue import Queue
 import ray
-from chatgpt.experience_maker.base import Experience
+from coati.experience_maker.base import Experience
 from .utils import BufferItem, make_experience_batch, split_experience_batch
 from threading import Lock
 import copy
diff --git a/applications/Chat/coati/trainer/detached_base.py b/applications/Chat/coati/trainer/detached_base.py
index 68e625db68e1..d79af64885eb 100644
--- a/applications/Chat/coati/trainer/detached_base.py
+++ b/applications/Chat/coati/trainer/detached_base.py
@@ -5,8 +5,8 @@
 from torch import Tensor
 import torch.nn as nn
 from torch.optim import Optimizer
-from chatgpt.experience_maker import Experience
-from chatgpt.replay_buffer import DetachedReplayBuffer
+from coati.experience_maker import Experience
+from coati.replay_buffer import DetachedReplayBuffer
 from tqdm import tqdm
 
 from .callbacks import Callback
diff --git a/applications/Chat/coati/trainer/detached_ppo.py b/applications/Chat/coati/trainer/detached_ppo.py
index 6ceba983b2da..1d2ba06a12a5 100644
--- a/applications/Chat/coati/trainer/detached_ppo.py
+++ b/applications/Chat/coati/trainer/detached_ppo.py
@@ -6,12 +6,12 @@
 from torch.optim import Optimizer
 from torch.optim import Adam
 
-from chatgpt.experience_maker import Experience, NaiveExperienceMaker
-from chatgpt.models.base import Actor, Critic
-from chatgpt.models.generation_utils import update_model_kwargs_fn
-from chatgpt.models.loss import PolicyLoss, ValueLoss
-from chatgpt.replay_buffer import DetachedReplayBuffer
-from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.experience_maker import Experience, NaiveExperienceMaker
+from coati.models.base import Actor, Critic
+from coati.models.generation_utils import update_model_kwargs_fn
+from coati.models.loss import PolicyLoss, ValueLoss
+from coati.replay_buffer import DetachedReplayBuffer
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 
 from colossalai.nn.optimizer import HybridAdam
 
diff --git a/applications/Chat/examples/1m1t.py b/applications/Chat/examples/1m1t.py
index c8f636782b73..1aac3fddaf02 100644
--- a/applications/Chat/examples/1m1t.py
+++ b/applications/Chat/examples/1m1t.py
@@ -3,9 +3,9 @@
 
 import pandas as pd
 import torch
-from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
-from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
diff --git a/applications/Chat/examples/1m2t.py b/applications/Chat/examples/1m2t.py
index 8a0083c15b3d..043a90b553e3 100644
--- a/applications/Chat/examples/1m2t.py
+++ b/applications/Chat/examples/1m2t.py
@@ -3,9 +3,9 @@
 
 import pandas as pd
 import torch
-from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
-from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
diff --git a/applications/Chat/examples/2m1t.py b/applications/Chat/examples/2m1t.py
index 1682325699d4..8eca3d2b122c 100644
--- a/applications/Chat/examples/2m1t.py
+++ b/applications/Chat/examples/2m1t.py
@@ -3,9 +3,9 @@
 
 import pandas as pd
 import torch
-from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
-from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
diff --git a/applications/Chat/examples/2m2t.py b/applications/Chat/examples/2m2t.py
index d50a21099d65..2dc2a60d4710 100644
--- a/applications/Chat/examples/2m2t.py
+++ b/applications/Chat/examples/2m2t.py
@@ -3,9 +3,9 @@
 
 import pandas as pd
 import torch
-from chatgpt.trainer import PPOTrainer, DetachedPPOTrainer
-from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from chatgpt.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer

From 3c6f68cede0e0703313cb916723c043da22c8831 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Fri, 31 Mar 2023 15:44:18 +0800
Subject: [PATCH 31/36] nothing

---
 .../Chat/coati/experience_maker/detached.py   | 38 ++++++++-----------
 .../Chat/coati/trainer/detached_ppo.py        | 16 ++++++--
 applications/Chat/examples/2m2t.sh            | 36 +++++++++---------
 applications/Chat/examples/train_dummy.sh     |  2 +-
 applications/Chat/examples/train_prompts.py   |  8 ++--
 applications/Chat/examples/train_prompts.sh   |  8 ++--
 6 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/applications/Chat/coati/experience_maker/detached.py b/applications/Chat/coati/experience_maker/detached.py
index 91c7718d601a..ff52157c6d35 100644
--- a/applications/Chat/coati/experience_maker/detached.py
+++ b/applications/Chat/coati/experience_maker/detached.py
@@ -39,6 +39,7 @@ def __init__(self,
         self.target_trainer_list = []
         for name in detached_trainer_name_list:
             self.target_trainer_list.append(ray.get_actor(name, namespace=os.environ["RAY_NAMESPACE"]))
+        self.strategy_str = strategy
         self.strategy = get_strategy_from_args(strategy)
         self.experience_batch_size = experience_batch_size
         self.kl_coef = kl_coef
@@ -99,15 +100,6 @@ def _send_experience(self, experience):
     def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
         self._get_ready()
         sampler = self.strategy.setup_sampler(dataset)
-        prof = torch.profiler.profile(
-                            schedule=torch.profiler.schedule(wait=1, warmup=1, active=18, repeat=1),
-                            on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/1m1t_maker'),
-                            record_shapes=True,
-                            profile_memory=True,
-                            with_stack=True)
-        prof.start()
-        count_time = 0
-        saved = False
         for _ in range(times):
             rand_prompts = sampler.sample(self.experience_batch_size)
             if tokenizer is not None:
@@ -118,14 +110,6 @@ def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None
             experience = self._make_experience(inputs=inputs)
             self._model_visit_lock.release()
             self._send_experience(experience=experience)
-            
-            if count_time >= 20:
-                prof.stop()
-                count_time = 0
-                saved = True
-            if not saved:
-                count_time+=1
-                prof.step()
 
     @ray.method(concurrency_group="model_io")
     def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
@@ -144,11 +128,16 @@ def initialize_experience_maker(self, init_actor: Actor, init_critic: Critic):
                 initial_model = deepcopy(actor)
                 reward_model = RewardModel(deepcopy(critic.model),
                                            deepcopy(critic.value_head)).to(torch.cuda.current_device())
+            if self.strategy_str != 'colossalai_gemini':
+                actor.to(torch.float16).to(torch.cuda.current_device())
+                critic.to(torch.float16).to(torch.cuda.current_device())
+                initial_model.to(torch.float16).to(torch.cuda.current_device())
+                reward_model.to(torch.float16).to(torch.cuda.current_device())
 
-            self.experience_maker.actor = actor
-            self.experience_maker.critic = critic
-            self.experience_maker.initial_model = initial_model
-            self.experience_maker.reward_model = reward_model
+            self.experience_maker.actor = self.strategy.prepare(actor)
+            self.experience_maker.critic = self.strategy.prepare(critic)
+            self.experience_maker.initial_model = self.strategy.prepare(initial_model)
+            self.experience_maker.reward_model = self.strategy.prepare(reward_model)
         self.fully_initialized = True
 
     @ray.method(concurrency_group="model_io")
@@ -161,6 +150,9 @@ def update_experience_maker(self, new_actor: Actor, new_critic: Critic):
         with torch.no_grad():
             if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
                 print("[maker] UPDATE ")
-            self.experience_maker.actor = new_actor
-            self.experience_maker.critic = new_critic
+            if self.strategy_str != 'colossalai_gemini':
+                new_actor.to(torch.float16).to(torch.cuda.current_device())
+                new_critic.to(torch.float16).to(torch.cuda.current_device())
+            self.experience_maker.actor = self.strategy.prepare(new_actor)
+            self.experience_maker.critic = self.strategy.prepare(new_critic)
         self._model_visit_lock.release()
diff --git a/applications/Chat/coati/trainer/detached_ppo.py b/applications/Chat/coati/trainer/detached_ppo.py
index 1d2ba06a12a5..10cccd03ec43 100644
--- a/applications/Chat/coati/trainer/detached_ppo.py
+++ b/applications/Chat/coati/trainer/detached_ppo.py
@@ -71,18 +71,24 @@ def __init__(self,
         with self.strategy.model_init_context():
             self.actor, self.critic = get_cuda_actor_critic_from_args(model, pretrained, lora_rank)
 
-        self.actor_loss_fn = PolicyLoss(eps_clip)
-        self.critic_loss_fn = ValueLoss(value_clip)
-        if isinstance(self.strategy, ColossalAIStrategy):
+        if strategy != 'colossalai_gemini':
+            self.actor.to(torch.float16).to(torch.cuda.current_device())
+            self.critic.to(torch.float16).to(torch.cuda.current_device())
+
+        if strategy.startswith('colossalai'):
             self.actor_optim = HybridAdam(self.actor.parameters(), lr=5e-6)
             self.critic_optim = HybridAdam(self.critic.parameters(), lr=5e-6)
         else:
             self.actor_optim = Adam(self.actor.parameters(), lr=5e-6)
             self.critic_optim = Adam(self.critic.parameters(), lr=5e-6)
+
         (self.actor, self.actor_optim), (self.critic, self.critic_optim) = \
             self.strategy.prepare((self.actor, self.actor_optim), (self.critic, self.critic_optim))
         generate_kwargs = _set_default_generate_kwargs(self.strategy, generate_kwargs, self.actor)
 
+        self.actor_loss_fn = PolicyLoss(eps_clip)
+        self.critic_loss_fn = ValueLoss(value_clip)
+
         super().__init__(experience_maker_holder_name_list,
                          train_batch_size=train_batch_size,
                          buffer_limit=buffer_limit,
@@ -157,6 +163,8 @@ def strategy_save_critic_optim(self, path: str, only_rank0: bool = False) -> Non
     def _get_unwrapped_actor(self):
         if False:
             pass
+        elif isinstance(self.strategy, ColossalAIStrategy):
+            return Actor(self.strategy._unwrap_model(self.actor))
         elif isinstance(self.strategy, DDPStrategy):
             return Actor(self.strategy._unwrap_actor(self.actor))
         elif isinstance(self.strategy, NaiveStrategy):
@@ -165,6 +173,8 @@ def _get_unwrapped_actor(self):
     def _get_unwrapped_critic(self):
         if False:
             pass
+        elif isinstance(self.strategy, ColossalAIStrategy):
+            return self.strategy._unwrap_model(self.critic)
         elif isinstance(self.strategy, DDPStrategy):
             return self.critic.module
         elif isinstance(self.strategy, NaiveStrategy):
diff --git a/applications/Chat/examples/2m2t.sh b/applications/Chat/examples/2m2t.sh
index 79088ad1439c..359c8f00eb09 100644
--- a/applications/Chat/examples/2m2t.sh
+++ b/applications/Chat/examples/2m2t.sh
@@ -1,24 +1,24 @@
-set_n_least_used_CUDA_VISIBLE_DEVICES() {
-    local n=${1:-"9999"}
-    echo "GPU Memory Usage:"
-    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
-        | tail -n +2 \
-        | nl -v 0 \
-        | tee /dev/tty \
-        | sort -g -k 2 \
-        | awk '{print $1}' \
-        | head -n $n)
-    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
-    echo "Now CUDA_VISIBLE_DEVICES is set to:"
-    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-}
-
-set_n_least_used_CUDA_VISIBLE_DEVICES 2
+# set_n_least_used_CUDA_VISIBLE_DEVICES() {
+#     local n=${1:-"9999"}
+#     echo "GPU Memory Usage:"
+#     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+#         | tail -n +2 \
+#         | nl -v 0 \
+#         | tee /dev/tty \
+#         | sort -g -k 2 \
+#         | awk '{print $1}' \
+#         | head -n $n)
+#     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+#     echo "Now CUDA_VISIBLE_DEVICES is set to:"
+#     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+# }
+# 
+# set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
-    --num_episodes 2 --max_timesteps 10 --update_timesteps 10 \
+    --maker_strategy naive --trainer_strategy colossalai_zero2 --lora_rank 2 \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug 
\ No newline at end of file
diff --git a/applications/Chat/examples/train_dummy.sh b/applications/Chat/examples/train_dummy.sh
index 595da573e2b1..f05249a38d21 100755
--- a/applications/Chat/examples/train_dummy.sh
+++ b/applications/Chat/examples/train_dummy.sh
@@ -14,5 +14,5 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 }
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
-
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
 torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
index 6643796d7a8b..30f56babd64b 100644
--- a/applications/Chat/examples/train_prompts.py
+++ b/applications/Chat/examples/train_prompts.py
@@ -83,13 +83,13 @@ def main(args):
             raise ValueError(f'Unsupported actor model "{args.model}"')
 
         if rm_model_name == 'gpt2':
-            critic = GPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
+            critic = GPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank)#, use_action_mask=True)
         elif rm_model_name == 'bloom':
-            critic = BLOOMCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
+            critic = BLOOMCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank)#, use_action_mask=True)
         elif rm_model_name == 'opt':
-            critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
+            critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank)#, use_action_mask=True)
         elif rm_model_name == 'llama':
-            critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
+            critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank)#, use_action_mask=True)
         else:
             raise ValueError(f'Unsupported reward model "{rm_model_name}"')
 
diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh
index 1dcd0412e93e..7e6619e76077 100755
--- a/applications/Chat/examples/train_prompts.sh
+++ b/applications/Chat/examples/train_prompts.sh
@@ -14,8 +14,8 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 }
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 
-torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
-
-# torchrun --standalone --nproc_per_node=2 train_prompts.py "./awesome-chatgpt-prompts/prompts.csv" \
-#     --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file
+torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_path "./awesome-chatgpt-prompts/prompts.csv" \
+    --strategy colossalai_zero2 --lora_rank 2
\ No newline at end of file

From 35e46027cf2e3caf5422131c0e55d9cdbc8a7f7a Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Mon, 3 Apr 2023 16:03:23 +0800
Subject: [PATCH 32/36] I am going to detach the replaybuffer from trainer and
 make it a Ray Actor. Two benefits: 1. support TP trainer. 2. asynchronized
 buffer operations

---
 applications/Chat/coati/replay_buffer/detached.py | 1 +
 applications/Chat/coati/trainer/detached_ppo.py   | 7 +++++--
 applications/Chat/examples/1m1t.sh                | 4 ++--
 applications/Chat/examples/2m2t.sh                | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/applications/Chat/coati/replay_buffer/detached.py b/applications/Chat/coati/replay_buffer/detached.py
index 3f968af400f9..0946406eb7b7 100644
--- a/applications/Chat/coati/replay_buffer/detached.py
+++ b/applications/Chat/coati/replay_buffer/detached.py
@@ -5,6 +5,7 @@
 # from torch.multiprocessing import Queue
 from ray.util.queue import Queue
 import ray
+import asyncio
 from coati.experience_maker.base import Experience
 from .utils import BufferItem, make_experience_batch, split_experience_batch
 from threading import Lock
diff --git a/applications/Chat/coati/trainer/detached_ppo.py b/applications/Chat/coati/trainer/detached_ppo.py
index 10cccd03ec43..0cdb7333f282 100644
--- a/applications/Chat/coati/trainer/detached_ppo.py
+++ b/applications/Chat/coati/trainer/detached_ppo.py
@@ -164,7 +164,8 @@ def _get_unwrapped_actor(self):
         if False:
             pass
         elif isinstance(self.strategy, ColossalAIStrategy):
-            return Actor(self.strategy._unwrap_model(self.actor))
+            ret = Actor(self.strategy._unwrap_model(self.actor))
+            return ret
         elif isinstance(self.strategy, DDPStrategy):
             return Actor(self.strategy._unwrap_actor(self.actor))
         elif isinstance(self.strategy, NaiveStrategy):
@@ -174,12 +175,14 @@ def _get_unwrapped_critic(self):
         if False:
             pass
         elif isinstance(self.strategy, ColossalAIStrategy):
-            return self.strategy._unwrap_model(self.critic)
+            ret = self.strategy._unwrap_model(self.critic)
+            return ret
         elif isinstance(self.strategy, DDPStrategy):
             return self.critic.module
         elif isinstance(self.strategy, NaiveStrategy):
             return self.critic
 
+
 def _set_default_generate_kwargs(strategy: Strategy, generate_kwargs: dict, actor: Actor) -> None:
     origin_model = strategy._unwrap_actor(actor)
     new_kwargs = {**generate_kwargs}
diff --git a/applications/Chat/examples/1m1t.sh b/applications/Chat/examples/1m1t.sh
index 0f31c2647344..091d69bfb923 100644
--- a/applications/Chat/examples/1m1t.sh
+++ b/applications/Chat/examples/1m1t.sh
@@ -19,6 +19,6 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
-    --trainer_strategy ddp --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
+    --trainer_strategy colossalai_zero2 --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  # --debug
+    --max_epochs 10   --debug
diff --git a/applications/Chat/examples/2m2t.sh b/applications/Chat/examples/2m2t.sh
index 359c8f00eb09..d8d0993ee5a3 100644
--- a/applications/Chat/examples/2m2t.sh
+++ b/applications/Chat/examples/2m2t.sh
@@ -15,7 +15,7 @@
 # 
 # set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
+# export CUDA_VISIBLE_DEVICES="0,1,2,3"
 export RAY_NAMESPACE="lccsr"
 
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \

From 04069cd1cb7beac6cc632b31ff6eee1f7ac85926 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Tue, 4 Apr 2023 15:36:20 +0800
Subject: [PATCH 33/36] experience_maker_holder performs target-revolving
 _send_experience() instead of length comparison.

---
 applications/Chat/coati/experience_maker/detached.py | 10 ++++++++++
 applications/Chat/examples/2m2t.sh                   |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/applications/Chat/coati/experience_maker/detached.py b/applications/Chat/coati/experience_maker/detached.py
index ff52157c6d35..2d9cdbc950e2 100644
--- a/applications/Chat/coati/experience_maker/detached.py
+++ b/applications/Chat/coati/experience_maker/detached.py
@@ -74,6 +74,7 @@ def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experien
 
     @ray.method(concurrency_group="experience_io")
     def _send_experience(self, experience):
+        '''
         # choose a trainer that has the least experience batch in its detached_replay_buffer
         chosen_trainer = None
         min_length = None
@@ -96,6 +97,15 @@ def _send_experience(self, experience):
         if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
             print(f"[maker] sending exp to {chosen_trainer}")
         chosen_trainer.buffer_append.remote(experience)
+        '''
+        # 
+        if not hasattr(self, "_target_idx"):
+            self._target_idx = 0
+        chosen_trainer = self.target_trainer_list[self._target_idx]
+        if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
+            print(f"[maker] sending exp to {chosen_trainer}")
+        chosen_trainer.buffer_append.remote(experience)
+        self._target_idx = (self._target_idx + 1) % len(self.target_trainer_list)
 
     def workingloop(self, dataset, tokenizer: Optional[Callable[[Any], dict]] = None, times=5000 * 50000):
         self._get_ready()
diff --git a/applications/Chat/examples/2m2t.sh b/applications/Chat/examples/2m2t.sh
index d8d0993ee5a3..c546950310ae 100644
--- a/applications/Chat/examples/2m2t.sh
+++ b/applications/Chat/examples/2m2t.sh
@@ -21,4 +21,4 @@ export RAY_NAMESPACE="lccsr"
 python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
     --maker_strategy naive --trainer_strategy colossalai_zero2 --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  # --debug 
\ No newline at end of file
+    --max_epochs 10  --debug
\ No newline at end of file

From 3a4d0e719524b9953e64ffe46c83274f22a1b0b6 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 13 Apr 2023 18:26:00 +0800
Subject: [PATCH 34/36] move code to ray subfolder

---
 .../Chat/coati/experience_maker/__init__.py   |  3 +-
 .../Chat/{examples => coati/ray}/1m1t.py      |  6 ++-
 .../Chat/{examples => coati/ray}/1m1t.sh      |  7 ++-
 .../Chat/{examples => coati/ray}/1m2t.py      |  9 ++--
 .../Chat/{examples => coati/ray}/1m2t.sh      |  5 +-
 .../Chat/{examples => coati/ray}/2m1t.py      |  7 ++-
 .../Chat/{examples => coati/ray}/2m1t.sh      |  5 +-
 .../Chat/{examples => coati/ray}/2m2t.py      |  9 ++--
 .../Chat/{examples => coati/ray}/2m2t.sh      |  4 +-
 .../src/detached_replay_buffer.py}            |  4 +-
 .../src/detached_trainer_base.py}             | 16 ++-----
 .../src/detached_trainer_ppo.py}              | 17 +++----
 .../src/experience_maker_holder.py}           | 10 ++--
 applications/Chat/coati/ray/src/utils.py      | 48 +++++++++++++++++++
 .../Chat/coati/replay_buffer/__init__.py      |  3 +-
 applications/Chat/coati/trainer/__init__.py   |  5 +-
 16 files changed, 96 insertions(+), 62 deletions(-)
 rename applications/Chat/{examples => coati/ray}/1m1t.py (96%)
 rename applications/Chat/{examples => coati/ray}/1m1t.sh (81%)
 rename applications/Chat/{examples => coati/ray}/1m2t.py (97%)
 rename applications/Chat/{examples => coati/ray}/1m2t.sh (83%)
 rename applications/Chat/{examples => coati/ray}/2m1t.py (96%)
 rename applications/Chat/{examples => coati/ray}/2m1t.sh (85%)
 rename applications/Chat/{examples => coati/ray}/2m2t.py (97%)
 rename applications/Chat/{examples => coati/ray}/2m2t.sh (90%)
 rename applications/Chat/coati/{replay_buffer/detached.py => ray/src/detached_replay_buffer.py} (95%)
 rename applications/Chat/coati/{trainer/detached_base.py => ray/src/detached_trainer_base.py} (96%)
 rename applications/Chat/coati/{trainer/detached_ppo.py => ray/src/detached_trainer_ppo.py} (97%)
 rename applications/Chat/coati/{experience_maker/detached.py => ray/src/experience_maker_holder.py} (97%)
 create mode 100644 applications/Chat/coati/ray/src/utils.py

diff --git a/applications/Chat/coati/experience_maker/__init__.py b/applications/Chat/coati/experience_maker/__init__.py
index e3f43a20f72a..39ca7576b227 100644
--- a/applications/Chat/coati/experience_maker/__init__.py
+++ b/applications/Chat/coati/experience_maker/__init__.py
@@ -1,5 +1,4 @@
 from .base import Experience, ExperienceMaker
 from .naive import NaiveExperienceMaker
-from .detached import ExperienceMakerHolder
 
-__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker', 'ExperienceMakerHolder']
+__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
diff --git a/applications/Chat/examples/1m1t.py b/applications/Chat/coati/ray/1m1t.py
similarity index 96%
rename from applications/Chat/examples/1m1t.py
rename to applications/Chat/coati/ray/1m1t.py
index 1aac3fddaf02..abfd4f3d46e7 100644
--- a/applications/Chat/examples/1m1t.py
+++ b/applications/Chat/coati/ray/1m1t.py
@@ -3,9 +3,11 @@
 
 import pandas as pd
 import torch
-from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer import PPOTrainer
+from src.detached_trainer_ppo import DetachedPPOTrainer
+from src.experience_maker_holder import ExperienceMakerHolder
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
diff --git a/applications/Chat/examples/1m1t.sh b/applications/Chat/coati/ray/1m1t.sh
similarity index 81%
rename from applications/Chat/examples/1m1t.sh
rename to applications/Chat/coati/ray/1m1t.sh
index 091d69bfb923..f7c5054c800e 100644
--- a/applications/Chat/examples/1m1t.sh
+++ b/applications/Chat/coati/ray/1m1t.sh
@@ -13,12 +13,11 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
 }
 
-set_n_least_used_CUDA_VISIBLE_DEVICES 3
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-export RAY_NAMESPACE="lccsr"
+export RAY_NAMESPACE="admin"
 
-python 1m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+python 1m1t.py "/path/to/prompts.csv" \
     --trainer_strategy colossalai_zero2 --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10   --debug
diff --git a/applications/Chat/examples/1m2t.py b/applications/Chat/coati/ray/1m2t.py
similarity index 97%
rename from applications/Chat/examples/1m2t.py
rename to applications/Chat/coati/ray/1m2t.py
index 043a90b553e3..b4a2cc0b1025 100644
--- a/applications/Chat/examples/1m2t.py
+++ b/applications/Chat/coati/ray/1m2t.py
@@ -3,9 +3,11 @@
 
 import pandas as pd
 import torch
-from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer import PPOTrainer
+from src.detached_trainer_ppo import DetachedPPOTrainer
+from src.experience_maker_holder import ExperienceMakerHolder
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
@@ -13,11 +15,8 @@
 from colossalai.nn.optimizer import HybridAdam
 
 import ray
-
 import os
 import socket
-import multiprocessing
-
 
 def get_free_port():
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
diff --git a/applications/Chat/examples/1m2t.sh b/applications/Chat/coati/ray/1m2t.sh
similarity index 83%
rename from applications/Chat/examples/1m2t.sh
rename to applications/Chat/coati/ray/1m2t.sh
index 0a7c66636d27..669f4141026c 100644
--- a/applications/Chat/examples/1m2t.sh
+++ b/applications/Chat/coati/ray/1m2t.sh
@@ -15,10 +15,9 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-export RAY_NAMESPACE="lccsr"
+export RAY_NAMESPACE="admin"
 
-python 1m2t.py "./awesome-chatgpt-prompts/prompts.csv" --model gpt2 \
+python 1m2t.py "/path/to/prompts.csv" --model gpt2 \
     --maker_strategy naive --trainer_strategy ddp --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  #--debug 
\ No newline at end of file
diff --git a/applications/Chat/examples/2m1t.py b/applications/Chat/coati/ray/2m1t.py
similarity index 96%
rename from applications/Chat/examples/2m1t.py
rename to applications/Chat/coati/ray/2m1t.py
index 8eca3d2b122c..67b196e3b214 100644
--- a/applications/Chat/examples/2m1t.py
+++ b/applications/Chat/coati/ray/2m1t.py
@@ -3,9 +3,11 @@
 
 import pandas as pd
 import torch
-from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer import PPOTrainer
+from src.detached_trainer_ppo import DetachedPPOTrainer
+from src.experience_maker_holder import ExperienceMakerHolder
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
@@ -14,6 +16,7 @@
 
 import ray
 import os
+import socket
 
 def main(args):
     # configure tokenizer
diff --git a/applications/Chat/examples/2m1t.sh b/applications/Chat/coati/ray/2m1t.sh
similarity index 85%
rename from applications/Chat/examples/2m1t.sh
rename to applications/Chat/coati/ray/2m1t.sh
index f98b75dab12a..a207d4118d60 100644
--- a/applications/Chat/examples/2m1t.sh
+++ b/applications/Chat/coati/ray/2m1t.sh
@@ -15,10 +15,9 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 3
 
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
-export RAY_NAMESPACE="lccsr"
+export RAY_NAMESPACE="admin"
 
-python 2m1t.py "./awesome-chatgpt-prompts/prompts.csv" \
+python 2m1t.py "/path/to/prompts.csv" \
     --trainer_strategy naive --maker_strategy naive --lora_rank 2 --pretrain "facebook/opt-350m" --model 'opt' \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  # --debug
diff --git a/applications/Chat/examples/2m2t.py b/applications/Chat/coati/ray/2m2t.py
similarity index 97%
rename from applications/Chat/examples/2m2t.py
rename to applications/Chat/coati/ray/2m2t.py
index 2dc2a60d4710..f4228d47bedd 100644
--- a/applications/Chat/examples/2m2t.py
+++ b/applications/Chat/coati/ray/2m2t.py
@@ -3,9 +3,11 @@
 
 import pandas as pd
 import torch
-from coati.trainer import PPOTrainer, DetachedPPOTrainer
+from coati.trainer import PPOTrainer
+from src.detached_trainer_ppo import DetachedPPOTrainer
+from src.experience_maker_holder import ExperienceMakerHolder
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
-from coati.experience_maker import NaiveExperienceMaker, ExperienceMakerHolder
+from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
 from transformers import AutoTokenizer, BloomTokenizerFast
 from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
@@ -13,11 +15,8 @@
 from colossalai.nn.optimizer import HybridAdam
 
 import ray
-
 import os
 import socket
-import multiprocessing
-
 
 def get_free_port():
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
diff --git a/applications/Chat/examples/2m2t.sh b/applications/Chat/coati/ray/2m2t.sh
similarity index 90%
rename from applications/Chat/examples/2m2t.sh
rename to applications/Chat/coati/ray/2m2t.sh
index c546950310ae..1dee6d65c8c3 100644
--- a/applications/Chat/examples/2m2t.sh
+++ b/applications/Chat/coati/ray/2m2t.sh
@@ -16,9 +16,9 @@
 # set_n_least_used_CUDA_VISIBLE_DEVICES 2
 
 # export CUDA_VISIBLE_DEVICES="0,1,2,3"
-export RAY_NAMESPACE="lccsr"
+export RAY_NAMESPACE="admin"
 
-python 2m2t.py "./awesome-chatgpt-prompts/prompts.csv" \
+python 2m2t.py "path/to/prompts.csv" \
     --maker_strategy naive --trainer_strategy colossalai_zero2 --lora_rank 2 \
     --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
     --max_epochs 10  --debug
\ No newline at end of file
diff --git a/applications/Chat/coati/replay_buffer/detached.py b/applications/Chat/coati/ray/src/detached_replay_buffer.py
similarity index 95%
rename from applications/Chat/coati/replay_buffer/detached.py
rename to applications/Chat/coati/ray/src/detached_replay_buffer.py
index 0946406eb7b7..855eee48c5a5 100644
--- a/applications/Chat/coati/replay_buffer/detached.py
+++ b/applications/Chat/coati/ray/src/detached_replay_buffer.py
@@ -1,13 +1,13 @@
 import torch
 import random
 from typing import List, Any
-from .base import ReplayBuffer
 # from torch.multiprocessing import Queue
 from ray.util.queue import Queue
 import ray
 import asyncio
 from coati.experience_maker.base import Experience
-from .utils import BufferItem, make_experience_batch, split_experience_batch
+from coati.replay_buffer.utils import BufferItem, make_experience_batch, split_experience_batch
+from coati.replay_buffer import ReplayBuffer
 from threading import Lock
 import copy
 
diff --git a/applications/Chat/coati/trainer/detached_base.py b/applications/Chat/coati/ray/src/detached_trainer_base.py
similarity index 96%
rename from applications/Chat/coati/trainer/detached_base.py
rename to applications/Chat/coati/ray/src/detached_trainer_base.py
index d79af64885eb..f1ed1ec71499 100644
--- a/applications/Chat/coati/trainer/detached_base.py
+++ b/applications/Chat/coati/ray/src/detached_trainer_base.py
@@ -1,21 +1,13 @@
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Dict, List, Optional, Union
-
-import torch
-from torch import Tensor
-import torch.nn as nn
-from torch.optim import Optimizer
-from coati.experience_maker import Experience
-from coati.replay_buffer import DetachedReplayBuffer
 from tqdm import tqdm
-
-from .callbacks import Callback
-from .strategies import Strategy
-from .utils import is_rank_0
-
+from coati.trainer.callbacks import Callback
+from coati.experience_maker import Experience
 import ray
 import os
 
+from .detached_replay_buffer import DetachedReplayBuffer
+from .utils import is_rank_0
 
 class DetachedTrainer(ABC):
     '''
diff --git a/applications/Chat/coati/trainer/detached_ppo.py b/applications/Chat/coati/ray/src/detached_trainer_ppo.py
similarity index 97%
rename from applications/Chat/coati/trainer/detached_ppo.py
rename to applications/Chat/coati/ray/src/detached_trainer_ppo.py
index 0cdb7333f282..90e5e437750a 100644
--- a/applications/Chat/coati/trainer/detached_ppo.py
+++ b/applications/Chat/coati/ray/src/detached_trainer_ppo.py
@@ -1,27 +1,22 @@
 from typing import Any, Callable, Dict, List, Optional
-import time
-
 import torch
-import torch.nn as nn
-from torch.optim import Optimizer
 from torch.optim import Adam
 
 from coati.experience_maker import Experience, NaiveExperienceMaker
 from coati.models.base import Actor, Critic
 from coati.models.generation_utils import update_model_kwargs_fn
 from coati.models.loss import PolicyLoss, ValueLoss
-from coati.replay_buffer import DetachedReplayBuffer
-from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy, Strategy
+from coati.trainer.callbacks import Callback
 
 from colossalai.nn.optimizer import HybridAdam
 
-from .detached_base import DetachedTrainer
-from .callbacks import Callback
-from .strategies import Strategy
+import ray
+
+
 from .utils import is_rank_0, get_cuda_actor_critic_from_args, get_strategy_from_args, set_dist_env
+from .detached_trainer_base import DetachedTrainer
 
-import ray
-import copy
 
 @ray.remote(concurrency_groups={"buffer_length": 1, "buffer_append":1, "buffer_sample":1,"model_io": 1, "compute": 1})
 class DetachedPPOTrainer(DetachedTrainer):
diff --git a/applications/Chat/coati/experience_maker/detached.py b/applications/Chat/coati/ray/src/experience_maker_holder.py
similarity index 97%
rename from applications/Chat/coati/experience_maker/detached.py
rename to applications/Chat/coati/ray/src/experience_maker_holder.py
index 2d9cdbc950e2..696773e84cfb 100644
--- a/applications/Chat/coati/experience_maker/detached.py
+++ b/applications/Chat/coati/ray/src/experience_maker_holder.py
@@ -1,7 +1,5 @@
 import torch
 from typing import Any, Callable, Dict, List, Optional, Union
-from .naive import NaiveExperienceMaker, Experience, ExperienceMaker
-from ..replay_buffer.detached import DetachedReplayBuffer
 import ray
 from ray.exceptions import GetTimeoutError
 from torch import Tensor
@@ -9,13 +7,17 @@
 from coati.models.base import Actor, Critic, RewardModel
 from coati.trainer.strategies.sampler import DistributedSampler
 from coati.trainer.strategies import Strategy
-from coati.trainer.utils import is_rank_0, get_strategy_from_args, set_dist_env
+from coati.experience_maker import NaiveExperienceMaker, Experience, ExperienceMaker
+
 from copy import deepcopy
 from threading import Lock
 import time
 import os
 
 
+from .utils import is_rank_0, get_strategy_from_args, set_dist_env
+
+
 @ray.remote(concurrency_groups={"experience_io": 1, "model_io": 1, "compute": 1})
 class ExperienceMakerHolder:
     '''
@@ -75,6 +77,8 @@ def _make_experience(self, inputs: Union[Tensor, Dict[str, Tensor]]) -> Experien
     @ray.method(concurrency_group="experience_io")
     def _send_experience(self, experience):
         '''
+        ignore it
+
         # choose a trainer that has the least experience batch in its detached_replay_buffer
         chosen_trainer = None
         min_length = None
diff --git a/applications/Chat/coati/ray/src/utils.py b/applications/Chat/coati/ray/src/utils.py
new file mode 100644
index 000000000000..c750879b6d18
--- /dev/null
+++ b/applications/Chat/coati/ray/src/utils.py
@@ -0,0 +1,48 @@
+import torch.distributed as dist
+from typing import Any, Callable, Dict, List, Optional
+from coati.models.bloom import BLOOMActor, BLOOMCritic
+from coati.models.gpt import GPTActor, GPTCritic
+from coati.models.opt import OPTActor, OPTCritic
+from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
+import torch
+import os
+
+def is_rank_0() -> bool:
+    return not dist.is_initialized() or dist.get_rank() == 0
+
+
+def get_cuda_actor_critic_from_args(model: str, pretrained: str = None, lora_rank=0):
+    if model == 'gpt2':
+        actor = GPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = GPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    elif model == 'bloom':
+        actor = BLOOMActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = BLOOMCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    elif model == 'opt':
+        actor = OPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+        critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
+    else:
+        raise ValueError(f'Unsupported model "{model}"')
+    return actor, critic
+
+
+def get_strategy_from_args(strategy: str):
+    if strategy == 'naive':
+        strategy_ = NaiveStrategy()
+    elif strategy == 'ddp':
+        strategy_ = DDPStrategy()
+    elif strategy == 'colossalai_gemini':
+        strategy_ = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
+    elif strategy == 'colossalai_zero2':
+        strategy_ = ColossalAIStrategy(stage=2, placement_policy='cuda')
+    else:
+        raise ValueError(f'Unsupported strategy "{strategy}"')
+    return strategy_
+
+
+def set_dist_env(env_info: Dict[str, str]):
+    os.environ["RANK"] = env_info['rank']
+    os.environ["LOCAL_RANK"] = env_info['local_rank']
+    os.environ["WORLD_SIZE"] = env_info['world_size']
+    os.environ['MASTER_PORT'] = env_info['master_port']
+    os.environ['MASTER_ADDR'] = env_info['master_addr']
diff --git a/applications/Chat/coati/replay_buffer/__init__.py b/applications/Chat/coati/replay_buffer/__init__.py
index 9815e4fddd11..1ebf60382913 100644
--- a/applications/Chat/coati/replay_buffer/__init__.py
+++ b/applications/Chat/coati/replay_buffer/__init__.py
@@ -1,5 +1,4 @@
 from .base import ReplayBuffer
 from .naive import NaiveReplayBuffer
-from .detached import DetachedReplayBuffer
 
-__all__ = ['ReplayBuffer', 'NaiveReplayBuffer', 'DetachedReplayBuffer']
+__all__ = ['ReplayBuffer', 'NaiveReplayBuffer']
diff --git a/applications/Chat/coati/trainer/__init__.py b/applications/Chat/coati/trainer/__init__.py
index f9f2727621ef..525b57bf21d3 100644
--- a/applications/Chat/coati/trainer/__init__.py
+++ b/applications/Chat/coati/trainer/__init__.py
@@ -1,9 +1,6 @@
 from .base import Trainer
 from .ppo import PPOTrainer
 from .rm import RewardModelTrainer
-from .detached_ppo import DetachedPPOTrainer
-from .detached_base import DetachedTrainer
 from .sft import SFTTrainer
 
-__all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer', 'SFTTrainer',
-           'DetachedTrainer', 'DetachedPPOTrainer',]
+__all__ = ['Trainer', 'PPOTrainer', 'RewardModelTrainer', 'SFTTrainer']

From 95a6c72e9cb40bb85a376786c85130e63f33d1d9 Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Thu, 13 Apr 2023 19:30:00 +0800
Subject: [PATCH 35/36] working on pipeline inference

---
 .../coati/experience_maker/strategy/base.py   |   0
 .../Chat/coati/ray/src/pipeline_strategy.py   | 102 ++++++++++++++++++
 applications/Chat/coati/trainer/utils.py      |  35 ------
 3 files changed, 102 insertions(+), 35 deletions(-)
 delete mode 100644 applications/Chat/coati/experience_maker/strategy/base.py
 create mode 100644 applications/Chat/coati/ray/src/pipeline_strategy.py

diff --git a/applications/Chat/coati/experience_maker/strategy/base.py b/applications/Chat/coati/experience_maker/strategy/base.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/applications/Chat/coati/ray/src/pipeline_strategy.py b/applications/Chat/coati/ray/src/pipeline_strategy.py
new file mode 100644
index 000000000000..1ad65aee0787
--- /dev/null
+++ b/applications/Chat/coati/ray/src/pipeline_strategy.py
@@ -0,0 +1,102 @@
+from coati.trainer.strategies import Strategy
+from coati.trainer.strategies import NaiveStrategy
+from coati.models.base import Actor, RewardModel, Critic
+
+import numpy as np
+import torch
+from torch._C._distributed_rpc import _is_current_rpc_agent_set
+
+import colossalai
+from colossalai.pipeline.pipeline_process_group import ppg
+from colossalai.pipeline.rpc._pipeline_schedule import OneFOneBPipelineEngine
+from colossalai.fx import ColoTracer
+from colossalai.fx.passes.adding_split_node_pass import balanced_split_pass, split_with_split_nodes_pass
+from colossalai.pipeline.middleware.adaptor import get_fx_topology
+
+
+import os
+from functools import partial
+import random
+
+rpc_is_initialized = _is_current_rpc_agent_set
+
+class PipelineModel(torch.nn.Module):
+    '''
+    Actor has 2 kinds of jobs: forward and generate. 
+        better to just pipelinize the inner model
+    '''
+    def __init__(self,
+                 model: torch.nn.Module,
+                 stage_num: int,
+                 num_microbatches: int,
+                 data_kwargs = None,
+                 ):
+        super().__init__()
+        # create partition module
+        def create_partition_module(pp_rank:int, stage_num: int, model, data_kwargs):
+            model.eval()
+            tracer = ColoTracer()
+            meta_args = {k: v.to('meta') for k, v in data_kwargs.items()}
+            graph = tracer.trace(root=model, meta_args=meta_args)
+            gm = torch.fx.GraphModule(model, graph, model.__class__.__name__)
+            annotated_model = balanced_split_pass(gm, stage_num)
+            top_module, split_submodules = split_with_split_nodes_pass(annotated_model, merge_output=True)
+            topo = get_fx_topology(top_module)
+            for submodule in split_submodules:
+                if isinstance(submodule, torch.fx.GraphModule):
+                    setattr(submodule, '_topo', topo)
+            return split_submodules[pp_rank + 1]
+    
+        def partition(model, data_kwargs: dict, pp_rank: int, chunk: int, stage_num: int):
+            partition = create_partition_module(pp_rank, stage_num, model, data_kwargs)
+            return partition
+        self.inference_engine = OneFOneBPipelineEngine(
+            partition_fn=partial(partition, model, data_kwargs),
+            stage_num=stage_num,
+            num_microbatches=num_microbatches,
+            device='cuda',
+        )
+
+    def forward(self,
+                **model_inputs):
+        return self.inference_engine.forward_backward(**model_inputs, forward_only=True)
+
+
+
+class PPStrategy(NaiveStrategy):
+    """
+        Strategy for Pipeline inference (inference only!)
+        
+        master node only
+    """
+    def __init__(
+        self,
+        seed: int = 42
+    ):
+        self.seed = seed
+        super().__init__()
+        
+        
+    def setup_distributed(self) -> None:
+        colossalai.launch_from_torch({}, seed=self.seed)
+        ppg.set_global_info(rank = int(os.environ['RANK']),
+                            world_size=int(os.environ['WORLD_SIZE']),
+                            dp_degree=1,
+                            tp_degree=1,
+                            num_worker_threads=128,
+                            device="cuda")
+        
+    def model_init_context(self):
+        return super().model_init_context()
+    
+    def setup_model(self, model: torch.nn.Module) -> torch.nn.Module:
+        if isinstance(model, Actor) or \
+            isinstance(model, RewardModel) or \
+            isinstance(model, Critic):
+            model.model = PipelineModel(model.model)
+
+    def set_seed(self, seed: int) -> None:
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+
diff --git a/applications/Chat/coati/trainer/utils.py b/applications/Chat/coati/trainer/utils.py
index 9ffc39f1ffe9..1b17a0421656 100644
--- a/applications/Chat/coati/trainer/utils.py
+++ b/applications/Chat/coati/trainer/utils.py
@@ -12,38 +12,3 @@ def is_rank_0() -> bool:
     return not dist.is_initialized() or dist.get_rank() == 0
 
 
-def get_cuda_actor_critic_from_args(model: str, pretrained: str = None, lora_rank=0):
-    if model == 'gpt2':
-        actor = GPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-        critic = GPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-    elif model == 'bloom':
-        actor = BLOOMActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-        critic = BLOOMCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-    elif model == 'opt':
-        actor = OPTActor(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-        critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank).to(torch.cuda.current_device())
-    else:
-        raise ValueError(f'Unsupported model "{model}"')
-    return actor, critic
-
-
-def get_strategy_from_args(strategy: str):
-    if strategy == 'naive':
-        strategy_ = NaiveStrategy()
-    elif strategy == 'ddp':
-        strategy_ = DDPStrategy()
-    elif strategy == 'colossalai_gemini':
-        strategy_ = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif strategy == 'colossalai_zero2':
-        strategy_ = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    else:
-        raise ValueError(f'Unsupported strategy "{strategy}"')
-    return strategy_
-
-
-def set_dist_env(env_info: Dict[str, str]):
-    os.environ["RANK"] = env_info['rank']
-    os.environ["LOCAL_RANK"] = env_info['local_rank']
-    os.environ["WORLD_SIZE"] = env_info['world_size']
-    os.environ['MASTER_PORT'] = env_info['master_port']
-    os.environ['MASTER_ADDR'] = env_info['master_addr']

From 19fab46a015e5517411820384f0a89e6c5eea2bc Mon Sep 17 00:00:00 2001
From: csric <richcsr256@gmail.com>
Date: Fri, 14 Apr 2023 10:46:07 +0800
Subject: [PATCH 36/36] apply comments

---
 applications/Chat/coati/ray/2m2t.sh           | 24 -------------------
 applications/Chat/coati/ray/__init__.py       |  2 ++
 .../Chat/coati/ray/{ => example}/1m1t.py      |  7 ++++--
 .../Chat/coati/ray/{ => example}/1m1t.sh      |  0
 .../Chat/coati/ray/{ => example}/1m2t.py      |  8 +++++--
 .../Chat/coati/ray/{ => example}/1m2t.sh      |  0
 .../Chat/coati/ray/{ => example}/2m1t.py      |  8 +++++--
 .../Chat/coati/ray/{ => example}/2m1t.sh      |  0
 .../Chat/coati/ray/{ => example}/2m2t.py      |  8 +++++--
 applications/Chat/coati/ray/example/2m2t.sh   | 23 ++++++++++++++++++
 applications/Chat/coati/ray/src/__init__.py   |  0
 .../Chat/coati/ray/src/pipeline_strategy.py   |  3 +++
 applications/Chat/examples/train_dummy.sh     |  2 +-
 applications/Chat/examples/train_prompts.sh   |  2 +-
 14 files changed, 53 insertions(+), 34 deletions(-)
 delete mode 100644 applications/Chat/coati/ray/2m2t.sh
 create mode 100644 applications/Chat/coati/ray/__init__.py
 rename applications/Chat/coati/ray/{ => example}/1m1t.py (97%)
 rename applications/Chat/coati/ray/{ => example}/1m1t.sh (100%)
 rename applications/Chat/coati/ray/{ => example}/1m2t.py (98%)
 rename applications/Chat/coati/ray/{ => example}/1m2t.sh (100%)
 rename applications/Chat/coati/ray/{ => example}/2m1t.py (97%)
 rename applications/Chat/coati/ray/{ => example}/2m1t.sh (100%)
 rename applications/Chat/coati/ray/{ => example}/2m2t.py (98%)
 create mode 100644 applications/Chat/coati/ray/example/2m2t.sh
 create mode 100644 applications/Chat/coati/ray/src/__init__.py

diff --git a/applications/Chat/coati/ray/2m2t.sh b/applications/Chat/coati/ray/2m2t.sh
deleted file mode 100644
index 1dee6d65c8c3..000000000000
--- a/applications/Chat/coati/ray/2m2t.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-# set_n_least_used_CUDA_VISIBLE_DEVICES() {
-#     local n=${1:-"9999"}
-#     echo "GPU Memory Usage:"
-#     local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
-#         | tail -n +2 \
-#         | nl -v 0 \
-#         | tee /dev/tty \
-#         | sort -g -k 2 \
-#         | awk '{print $1}' \
-#         | head -n $n)
-#     export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
-#     echo "Now CUDA_VISIBLE_DEVICES is set to:"
-#     echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-# }
-# 
-# set_n_least_used_CUDA_VISIBLE_DEVICES 2
-
-# export CUDA_VISIBLE_DEVICES="0,1,2,3"
-export RAY_NAMESPACE="admin"
-
-python 2m2t.py "path/to/prompts.csv" \
-    --maker_strategy naive --trainer_strategy colossalai_zero2 --lora_rank 2 \
-    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
-    --max_epochs 10  --debug
\ No newline at end of file
diff --git a/applications/Chat/coati/ray/__init__.py b/applications/Chat/coati/ray/__init__.py
new file mode 100644
index 000000000000..5802c05bc03f
--- /dev/null
+++ b/applications/Chat/coati/ray/__init__.py
@@ -0,0 +1,2 @@
+from .src.detached_replay_buffer import DetachedReplayBuffer
+from .src.detached_trainer_ppo import DetachedPPOTrainer
diff --git a/applications/Chat/coati/ray/1m1t.py b/applications/Chat/coati/ray/example/1m1t.py
similarity index 97%
rename from applications/Chat/coati/ray/1m1t.py
rename to applications/Chat/coati/ray/example/1m1t.py
index abfd4f3d46e7..a6527370505b 100644
--- a/applications/Chat/coati/ray/1m1t.py
+++ b/applications/Chat/coati/ray/example/1m1t.py
@@ -4,8 +4,11 @@
 import pandas as pd
 import torch
 from coati.trainer import PPOTrainer
-from src.detached_trainer_ppo import DetachedPPOTrainer
-from src.experience_maker_holder import ExperienceMakerHolder
+
+
+from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
+from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer
+
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
diff --git a/applications/Chat/coati/ray/1m1t.sh b/applications/Chat/coati/ray/example/1m1t.sh
similarity index 100%
rename from applications/Chat/coati/ray/1m1t.sh
rename to applications/Chat/coati/ray/example/1m1t.sh
diff --git a/applications/Chat/coati/ray/1m2t.py b/applications/Chat/coati/ray/example/1m2t.py
similarity index 98%
rename from applications/Chat/coati/ray/1m2t.py
rename to applications/Chat/coati/ray/example/1m2t.py
index b4a2cc0b1025..3883c364a8e0 100644
--- a/applications/Chat/coati/ray/1m2t.py
+++ b/applications/Chat/coati/ray/example/1m2t.py
@@ -4,8 +4,11 @@
 import pandas as pd
 import torch
 from coati.trainer import PPOTrainer
-from src.detached_trainer_ppo import DetachedPPOTrainer
-from src.experience_maker_holder import ExperienceMakerHolder
+
+
+from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
+from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer
+
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
@@ -18,6 +21,7 @@
 import os
 import socket
 
+
 def get_free_port():
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
         s.bind(('', 0))
diff --git a/applications/Chat/coati/ray/1m2t.sh b/applications/Chat/coati/ray/example/1m2t.sh
similarity index 100%
rename from applications/Chat/coati/ray/1m2t.sh
rename to applications/Chat/coati/ray/example/1m2t.sh
diff --git a/applications/Chat/coati/ray/2m1t.py b/applications/Chat/coati/ray/example/2m1t.py
similarity index 97%
rename from applications/Chat/coati/ray/2m1t.py
rename to applications/Chat/coati/ray/example/2m1t.py
index 67b196e3b214..b655de1ab1fa 100644
--- a/applications/Chat/coati/ray/2m1t.py
+++ b/applications/Chat/coati/ray/example/2m1t.py
@@ -4,8 +4,11 @@
 import pandas as pd
 import torch
 from coati.trainer import PPOTrainer
-from src.detached_trainer_ppo import DetachedPPOTrainer
-from src.experience_maker_holder import ExperienceMakerHolder
+
+
+from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
+from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer
+
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
@@ -18,6 +21,7 @@
 import os
 import socket
 
+
 def main(args):
     # configure tokenizer
     if args.model == 'gpt2':
diff --git a/applications/Chat/coati/ray/2m1t.sh b/applications/Chat/coati/ray/example/2m1t.sh
similarity index 100%
rename from applications/Chat/coati/ray/2m1t.sh
rename to applications/Chat/coati/ray/example/2m1t.sh
diff --git a/applications/Chat/coati/ray/2m2t.py b/applications/Chat/coati/ray/example/2m2t.py
similarity index 98%
rename from applications/Chat/coati/ray/2m2t.py
rename to applications/Chat/coati/ray/example/2m2t.py
index f4228d47bedd..435c71915fc2 100644
--- a/applications/Chat/coati/ray/2m2t.py
+++ b/applications/Chat/coati/ray/example/2m2t.py
@@ -4,8 +4,11 @@
 import pandas as pd
 import torch
 from coati.trainer import PPOTrainer
-from src.detached_trainer_ppo import DetachedPPOTrainer
-from src.experience_maker_holder import ExperienceMakerHolder
+
+
+from coati.ray.src.experience_maker_holder import ExperienceMakerHolder
+from coati.ray.src.detached_trainer_ppo import DetachedPPOTrainer
+
 from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
 from coati.experience_maker import NaiveExperienceMaker
 from torch.optim import Adam
@@ -18,6 +21,7 @@
 import os
 import socket
 
+
 def get_free_port():
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
         s.bind(('', 0))
diff --git a/applications/Chat/coati/ray/example/2m2t.sh b/applications/Chat/coati/ray/example/2m2t.sh
new file mode 100644
index 000000000000..fb4024766c54
--- /dev/null
+++ b/applications/Chat/coati/ray/example/2m2t.sh
@@ -0,0 +1,23 @@
+set_n_least_used_CUDA_VISIBLE_DEVICES() {
+    local n=${1:-"9999"}
+    echo "GPU Memory Usage:"
+    local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv \
+        | tail -n +2 \
+        | nl -v 0 \
+        | tee /dev/tty \
+        | sort -g -k 2 \
+        | awk '{print $1}' \
+        | head -n $n)
+    export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
+    echo "Now CUDA_VISIBLE_DEVICES is set to:"
+    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+}
+
+set_n_least_used_CUDA_VISIBLE_DEVICES 2
+
+export RAY_NAMESPACE="admin"
+
+python 2m2t.py "path/to/prompts.csv" \
+    --maker_strategy naive --trainer_strategy colossalai_zero2 --lora_rank 2 \
+    --num_episodes 10 --max_timesteps 10 --update_timesteps 10 \
+    --max_epochs 10  --debug
\ No newline at end of file
diff --git a/applications/Chat/coati/ray/src/__init__.py b/applications/Chat/coati/ray/src/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/applications/Chat/coati/ray/src/pipeline_strategy.py b/applications/Chat/coati/ray/src/pipeline_strategy.py
index 1ad65aee0787..1780839c62ee 100644
--- a/applications/Chat/coati/ray/src/pipeline_strategy.py
+++ b/applications/Chat/coati/ray/src/pipeline_strategy.py
@@ -1,3 +1,6 @@
+# WIP
+
+
 from coati.trainer.strategies import Strategy
 from coati.trainer.strategies import NaiveStrategy
 from coati.models.base import Actor, RewardModel, Critic
diff --git a/applications/Chat/examples/train_dummy.sh b/applications/Chat/examples/train_dummy.sh
index f05249a38d21..595da573e2b1 100755
--- a/applications/Chat/examples/train_dummy.sh
+++ b/applications/Chat/examples/train_dummy.sh
@@ -14,5 +14,5 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 }
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
+
 torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2
diff --git a/applications/Chat/examples/train_prompts.sh b/applications/Chat/examples/train_prompts.sh
index 328a5393b13e..8e1ce67ecc64 100755
--- a/applications/Chat/examples/train_prompts.sh
+++ b/applications/Chat/examples/train_prompts.sh
@@ -14,7 +14,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
 }
 
 set_n_least_used_CUDA_VISIBLE_DEVICES 2
-export CUDA_VISIBLE_DEVICES="0,1,2,3"
+
 # torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
 
 torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_path /path/to/data.json --strategy colossalai_zero2