Skip to content
Merged

Ra #70

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ pip install <options> -e .
### Unit Tests
We use [PyTest](https://docs.pytest.org/en/latest/) to execute tests. You can install pytest by `pip install pytest`. As some of the tests require initialization of the distributed backend, GPUs are needed to execute these tests.

To set up the environment for unit testing, first change your current directory to the root directory of your local ColossalAI repository, then run
```bash
pip install -r requirements/requirements-test.txt
```
If you encounter an error telling "Could not find a version that satisfies the requirement fbgemm-gpu==0.2.0", please downgrade your python version to 3.8 or 3.9 and try again.

If you only want to run CPU tests, you can run

```bash
Expand Down Expand Up @@ -138,4 +144,4 @@ You can now create a pull request on the GitHub webpage of your repository. The

Do write clearly the description of your pull request and [link the pull request to your target issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue). This will automatically close the issue when the pull request is approved.

In case of code conflict, you should rebase your branch and resolve the conflicts manually.
In case of code conflict, you should rebase your branch and resolve the conflicts manually.
8 changes: 4 additions & 4 deletions applications/Chat/coati/ray/experience_maker_holder.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,15 @@ def update_experience_maker(self,
self.experience_maker.actor.model.load_state_dict(new_actor_state_dict, strict=False)
else:
new_actor_state_dict = state_dict_to(new_actor_state_dict, device=torch.cuda.current_device())
state_dict_increasae = self.actor_lora_constructor.reconstruct_increase(new_actor_state_dict, new_actor_lora_config_dict)
self.actor_lora_constructor.load_state_dict_increase(self.experience_maker.actor.model, state_dict_increasae)
state_dict_increase = self.actor_lora_constructor.reconstruct_increase(new_actor_state_dict, new_actor_lora_config_dict)
self.actor_lora_constructor.load_state_dict_increase(self.experience_maker.actor.model, state_dict_increase)
if new_critic_state_dict is not None:
if not self._update_lora_weights or fully_update:
self.experience_maker.critic.load_state_dict(new_critic_state_dict, strict=False)
else:
new_critic_state_dict = state_dict_to(new_critic_state_dict, device=torch.cuda.current_device())
state_dict_increasae = self.critic_lora_constructor.reconstruct_increase(new_critic_state_dict, new_critic_lora_config_dict)
self.critic_lora_constructor.load_state_dict_increase(self.experience_maker.critic, state_dict_increasae)
state_dict_increase = self.critic_lora_constructor.reconstruct_increase(new_critic_state_dict, new_critic_lora_config_dict)
self.critic_lora_constructor.load_state_dict_increase(self.experience_maker.critic, state_dict_increase)

# the lock must be released after both actor and critic being updated
if chunk_end:
Expand Down
12 changes: 6 additions & 6 deletions applications/Chat/coati/ray/lora_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class LoRAConfig:
class LoRAConstructor:
'''
Tools for reconstructing a model from a remote LoRA model.
(Transfering only LoRA data costs much less!)
(Transferring only LoRA data costs much less!)
Usage:
Step 1 (Sender):
filter_state_dict_lora()
Expand Down Expand Up @@ -52,7 +52,7 @@ def reconstruct_increase(self, state_dict_lora: Dict[str, Any], lora_config_dict
if lora_config_dict is not None:
self.register_lora_config(lora_config_dict)

state_dict_increasae = OrderedDict()
state_dict_increase = OrderedDict()
config_iter = iter(self.lora_config_dict.items())
lora_A, lora_B, layer_prefix = None, None, None
for k, v in state_dict_lora.items():
Expand All @@ -65,11 +65,11 @@ def reconstruct_increase(self, state_dict_lora: Dict[str, Any], lora_config_dict
assert layer_prefix_2 == layer_prefix, "unmatched (state_dict, config_dict) pair"
lora_B = v
weight_data_increase = self._compute(lora_A, lora_B, config)
state_dict_increasae[layer_prefix + '.weight'] = weight_data_increase
state_dict_increase[layer_prefix + '.weight'] = weight_data_increase
lora_A, lora_B, layer_prefix = None, None, None
else:
raise ValueError('unexpected key')
return state_dict_increasae
return state_dict_increase

def _compute(self, lora_A, lora_B, config=LoRAConfig()):
def T(w):
Expand All @@ -80,12 +80,12 @@ def T(w):
return weight_data_increase
return 0

def load_state_dict_increase(self, model: nn.Module, state_dict_increasae: Dict[str, Any]):
def load_state_dict_increase(self, model: nn.Module, state_dict_increase: Dict[str, Any]):
'''
The final reconstruction step
'''
# naive approach
model.load_state_dict({k: v + model.state_dict()[k] for k, v in state_dict_increasae.items()}, strict=False)
model.load_state_dict({k: v + model.state_dict()[k] for k, v in state_dict_increase.items()}, strict=False)

@staticmethod
def filter_state_dict_lora(state_dict: Dict[str, Any], keep_non_lora=False):
Expand Down
4 changes: 2 additions & 2 deletions applications/Chat/coati/trainer/strategies/colossalai.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ColossalAIStrategy(DDPStrategy):
precision(str): The precision to use. Choose in ('fp32', 'fp16'). Stage 3 only supports fp16.
seed(int): The seed for the random number generator.
shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3.
This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future.
This is not compatible with `from_pretrained()`. We temporarily disable this and will support it in the future.
placement_policy(str): The placement policy for gemini. Choose in ('cpu', 'cuda')
If it is “cpu”, parameters, gradients and optimizer states will be offloaded to CPU,
If it is “cuda”, they will not be offloaded, which means max CUDA memory will be used. It is the fastest.
Expand All @@ -39,7 +39,7 @@ class ColossalAIStrategy(DDPStrategy):
hidden_dim(optional, int): The hidden dimension for the gemini. Only for ZeRO-3.
min_chunk_size_mb(float): The minimum chunk size in MB. Only for ZeRO-3.
gpu_margin_mem_ratio(float): The margin memory ratio for the GPU. Only for ZeRO-3.
reduce_bugket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2.
reduce_bucket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2.
overlap_communication(bool): Whether to overlap communication and computation. Only for ZeRO-1 and ZeRO-2.
initial_scale(float): The initial scale for the optimizer.
growth_factor(float): The growth factor for the optimizer.
Expand Down