From 9bc702ab4823c78b33c31f50e68363c1ad157ae8 Mon Sep 17 00:00:00 2001
From: Camille Zhong <44392324+Camille7777@users.noreply.github.com>
Date: Fri, 24 Mar 2023 11:21:39 +0800
Subject: [PATCH 01/26] [doc] update chatgpt doc paper link (#3229)
#issue 3189
---
applications/ChatGPT/examples/README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/applications/ChatGPT/examples/README.md b/applications/ChatGPT/examples/README.md
index ce73a5407944..60e6d68bdc0f 100644
--- a/applications/ChatGPT/examples/README.md
+++ b/applications/ChatGPT/examples/README.md
@@ -22,10 +22,10 @@ torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain "faceb
- We add special token to the end of the sequence to get better result.
- We use cosine-reducing lr-scheduler for RM training.
- We set value_head as 1 liner layer and initialize the weight of value_head using N(0,1/(d_model + 1)) distribution.
-- We train a Bloom-560m reward model for 1 epoch and find the test acc of the model achieve the performance mentions in [Anthropics paper](https://arxiv.org/abs/2112.00861).
+- We train a Bloom-560m reward model for 1 epoch and find the test acc of the model achieve the performance mentions in [Anthropics paper](https://arxiv.org/abs/2204.05862).
### Experiment result
-Model performance in [Anthropics paper](https://arxiv.org/abs/2112.00861):
+Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):

From bd39877da41622240fa7b93c079433260173d5e0 Mon Sep 17 00:00:00 2001
From: Fazzie-Maqianli <55798671+Fazziekey@users.noreply.github.com>
Date: Fri, 24 Mar 2023 11:45:01 +0800
Subject: [PATCH 02/26] support instrcut training (#3230)
---
.../ChatGPT/chatgpt/dataset/sft_dataset.py | 7 ++++-
.../ChatGPT/chatgpt/models/base/actor.py | 3 ++
.../ChatGPT/chatgpt/models/llama/llama_lm.py | 2 ++
applications/ChatGPT/chatgpt/trainer/sft.py | 29 +++++++++++--------
.../chatgpt/trainer/strategies/colossalai.py | 1 -
.../ChatGPT/chatgpt/utils/tokenizer_utils.py | 6 ++++
applications/ChatGPT/examples/train_sft.py | 18 +++++++-----
applications/ChatGPT/examples/train_sft.sh | 8 ++++-
applications/ChatGPT/version.txt | 2 +-
9 files changed, 52 insertions(+), 24 deletions(-)
diff --git a/applications/ChatGPT/chatgpt/dataset/sft_dataset.py b/applications/ChatGPT/chatgpt/dataset/sft_dataset.py
index 11ec61908aef..5a5d37f695f3 100644
--- a/applications/ChatGPT/chatgpt/dataset/sft_dataset.py
+++ b/applications/ChatGPT/chatgpt/dataset/sft_dataset.py
@@ -119,10 +119,15 @@ def preprocess(
class AlpacaDataset(Dataset):
"""Dataset for supervised fine-tuning."""
- def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer):
+ def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer, max_length: int=None):
super(AlpacaDataset, self).__init__()
logger.info("Loading data...")
list_data_dict = jload(data_path)
+ logger.info(f"Loaded {len(list_data_dict)} examples.")
+
+ if max_length is not None:
+ logger.info(f"Truncating data to max length {max_length}...")
+ list_data_dict = [example for example in list_data_dict if len(example["input"]) <= max_length]
logger.info("Formatting inputs...")
prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
diff --git a/applications/ChatGPT/chatgpt/models/base/actor.py b/applications/ChatGPT/chatgpt/models/base/actor.py
index 57db2bb11a6a..a364f879a850 100644
--- a/applications/ChatGPT/chatgpt/models/base/actor.py
+++ b/applications/ChatGPT/chatgpt/models/base/actor.py
@@ -60,3 +60,6 @@ def forward(self,
logits = output['logits']
log_probs = log_probs_from_logits(logits[:, :-1, :], sequences[:, 1:])
return log_probs[:, -num_actions:]
+
+ def get_base_model(self):
+ return self.model
\ No newline at end of file
diff --git a/applications/ChatGPT/chatgpt/models/llama/llama_lm.py b/applications/ChatGPT/chatgpt/models/llama/llama_lm.py
index c63077b1ac04..5a1a88e0d253 100644
--- a/applications/ChatGPT/chatgpt/models/llama/llama_lm.py
+++ b/applications/ChatGPT/chatgpt/models/llama/llama_lm.py
@@ -36,3 +36,5 @@ def __init__(self,
super().__init__(model, lora_rank, lora_train_bias)
+ def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
+ return self.model(input_ids, attention_mask=attention_mask, labels=labels, **kwargs)
diff --git a/applications/ChatGPT/chatgpt/trainer/sft.py b/applications/ChatGPT/chatgpt/trainer/sft.py
index 3b35f516816f..d524ded3e825 100644
--- a/applications/ChatGPT/chatgpt/trainer/sft.py
+++ b/applications/ChatGPT/chatgpt/trainer/sft.py
@@ -61,13 +61,15 @@ def fit(self, logger, use_lora, log_interval=10):
# train
self.model.train()
for batch_id, batch in enumerate(self.train_dataloader):
- prompt_ids = batch["input_ids"]
- p_mask = batch["attention_mask"]
- labels = batch["labels"]
- prompt_ids = prompt_ids.squeeze(1).cuda()
- p_mask = p_mask.squeeze(1).cuda()
+ prompt_ids = batch["input_ids"].to(torch.cuda.current_device())
+ p_mask = batch["attention_mask"].to(torch.cuda.current_device())
+ labels = batch["labels"].to(torch.cuda.current_device())
+ # prompt_ids = prompt_ids.squeeze(1).cuda()
+ # p_mask = p_mask.squeeze(1).cuda()
# prompt_logits = self.model(prompt_ids, attention_mask=p_mask, labels=labels)
- loss, prompt_logits = self.model(prompt_ids, attention_mask=p_mask, labels=labels)
+ outputs = self.model(prompt_ids, attention_mask=p_mask, labels=labels)
+ loss = outputs.loss
+ prompt_logits = outputs.logits
# loss = self.loss_fn(prompt_logits, labels)
self.strategy.backward(loss, self.model, self.optimizer)
@@ -83,13 +85,16 @@ def fit(self, logger, use_lora, log_interval=10):
loss_sum = 0
num_seen = 0
for batch in self.eval_dataloader:
- prompt_ids = batch["input_ids"]
- p_mask = batch["attention_mask"]
- prompt_ids = prompt_ids.squeeze(1).cuda()
- p_mask = p_mask.squeeze(1).cuda()
+ prompt_ids = batch["input_ids"].to(torch.cuda.current_device())
+ p_mask = batch["attention_mask"].to(torch.cuda.current_device())
+ labels = batch["labels"].to(torch.cuda.current_device())
+ # prompt_ids = prompt_ids.squeeze(1).cuda()
+ # p_mask = p_mask.squeeze(1).cuda()
+
+ outputs = self.model(prompt_ids, attention_mask=p_mask, labels=labels)
+ loss = outputs.loss
+ # prompt_logits = outputs.logits
- prompt_logits = self.model(prompt_ids, attention_mask=p_mask)
- loss = self.loss_fn(prompt_logits, prompt_ids)
loss_sum += loss.item()
num_seen += prompt_ids.size(0)
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py b/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
index 64ebf12f1922..f11dc6f7544b 100644
--- a/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
+++ b/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
@@ -9,7 +9,6 @@
from chatgpt.models.lora import LoraLinear
from torch.optim import Optimizer
-
from transformers.modeling_utils import PreTrainedModel
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
diff --git a/applications/ChatGPT/chatgpt/utils/tokenizer_utils.py b/applications/ChatGPT/chatgpt/utils/tokenizer_utils.py
index 8699bf64c7b5..9cfae61ebeda 100644
--- a/applications/ChatGPT/chatgpt/utils/tokenizer_utils.py
+++ b/applications/ChatGPT/chatgpt/utils/tokenizer_utils.py
@@ -16,6 +16,8 @@
import transformers
+from ..models.llama.llama_lm import LlamaLM
+
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = ""
DEFAULT_BOS_TOKEN = ""
@@ -60,6 +62,10 @@ def smart_tokenizer_and_embedding_resize(
if tokenizer.pad_token is None:
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
+
+ if isinstance(model, LlamaLM):
+ model = model.get_base_model()
+
model.resize_token_embeddings(len(tokenizer))
if num_new_tokens > 0:
diff --git a/applications/ChatGPT/examples/train_sft.py b/applications/ChatGPT/examples/train_sft.py
index 83b34f9dd1ea..ffbf89ccd9bc 100644
--- a/applications/ChatGPT/examples/train_sft.py
+++ b/applications/ChatGPT/examples/train_sft.py
@@ -93,25 +93,27 @@ def train(args):
elif 'alpaca' in args.dataset:
train_dataset = AlpacaDataset(tokenizer=tokenizer, data_path=args.dataset)
eval_dataset = None
- eval_dataset
data_collator = AlpacaDataCollator(tokenizer=tokenizer)
if dist.is_initialized() and dist.get_world_size() > 1:
- sampler = DistributedSampler(train_dataset, shuffle=True, seed=42, drop_last=True)
- logger.info("Using Distributed Sampler")
+ train_sampler = DistributedSampler(train_dataset, shuffle=True, seed=42, drop_last=True)
+ if eval_dataset is not None:
+ eval_sampler = DistributedSampler(eval_dataset, shuffle=False, seed=42, drop_last=False)
else:
- sampler = None
+ train_sampler = None
+ eval_sampler = None
- train_dataloader = DataLoader(train_dataset, shuffle=(sampler is None), sampler=sampler, batch_size=args.batch_size)
+ train_dataloader = DataLoader(train_dataset, shuffle=(train_sampler is None), sampler=train_sampler, batch_size=args.batch_size, collate_fn=data_collator)
if eval_dataset is not None:
- eval_dataloader = DataLoader(eval_dataset, batch_size=args.batch_size)
+ eval_dataloader = DataLoader(eval_dataset, shuffle=(eval_sampler is None), sampler=eval_sampler, batch_size=args.batch_size, collate_fn=data_collator)
+ else:
+ eval_dataloader = None
trainer = SFTTrainer(model=model,
strategy=strategy,
optim=optim,
train_dataloader=train_dataloader,
eval_dataloader=eval_dataloader,
- sampler=sampler,
batch_size=args.batch_size,
max_epochs=args.max_epochs)
@@ -128,7 +130,7 @@ def train(args):
parser.add_argument('--strategy',
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='naive')
- parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt'], default='bloom')
+ parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--dataset', type=str, default='yizhongw/self_instruct')
parser.add_argument('--save_path', type=str, default='sft_ckpt.pth')
diff --git a/applications/ChatGPT/examples/train_sft.sh b/applications/ChatGPT/examples/train_sft.sh
index 9f747b24689e..1b85e83b6880 100755
--- a/applications/ChatGPT/examples/train_sft.sh
+++ b/applications/ChatGPT/examples/train_sft.sh
@@ -17,4 +17,10 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 8
#torchrun --standalone --nproc_per_node=2 train_sft.py --pretrain 'bigscience/bloomz-560m' --model 'bloom' --strategy colossalai_zero2 --log_interval 10
#torchrun --standalone --nproc_per_node=8 train_sft.py --model 'gpt2' --strategy colossalai_zero2 --batch_size 1 --log_interval 10
-torchrun --standalone --nproc_per_node=2 train_sft.py --pretrain "facebook/opt-350m" --model 'opt' --strategy colossalai_zero2 --log_interval 10
+torchrun --standalone --nproc_per_node=8 train_sft.py \
+ --pretrain "/data/personal/nus-mql/LLAMA-7B" \
+ --model 'llama' \
+ --strategy colossalai_zero2 \
+ --log_interval 10 \
+ --save_path /data/personal/nus-mql/Coati-7B \
+ --dataset /data/personal/nus-mql/stanford_alpaca/alpaca_data.json
diff --git a/applications/ChatGPT/version.txt b/applications/ChatGPT/version.txt
index 6e8bf73aa550..3eefcb9dd5b3 100644
--- a/applications/ChatGPT/version.txt
+++ b/applications/ChatGPT/version.txt
@@ -1 +1 @@
-0.1.0
+1.0.0
From 78fd31f9c15b698a4ed07748096684fa40bbc11a Mon Sep 17 00:00:00 2001
From: ver217
Date: Fri, 24 Mar 2023 12:15:06 +0800
Subject: [PATCH 03/26] [chatgpt] add precision option for colossalai (#3233)
---
.../chatgpt/trainer/strategies/colossalai.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py b/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
index f11dc6f7544b..0a7c9173283c 100644
--- a/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
+++ b/applications/ChatGPT/chatgpt/trainer/strategies/colossalai.py
@@ -30,6 +30,7 @@ class ColossalAIStrategy(DDPStrategy):
Args:
stage(int): The stage to use in ZeRO. Choose in (1, 2, 3)
+ precision(str): The precision to use. Choose in ('fp32', 'fp16'). Stage 3 only supports fp16.
seed(int): The seed for the random number generator.
shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3.
This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future.
@@ -59,6 +60,7 @@ class ColossalAIStrategy(DDPStrategy):
def __init__(
self,
stage: int = 3,
+ precision: str = 'fp16',
seed: int = 42,
shard_init: bool = False, # only for stage 3
placement_policy: str = 'cuda',
@@ -81,12 +83,17 @@ def __init__(
norm_type: float = 2.0) -> None:
super().__init__(seed)
assert placement_policy in ('cpu', 'cuda'), f'Unsupported placement policy "{placement_policy}"'
+ assert precision in ('fp32', 'fp16'), f'Unsupported precision "{precision}"'
self.stage = stage
# TODO(ver217): support shard_init when using from_pretrained()
if shard_init:
warnings.warn(
f'Shard init is not supported model.from_pretrained() yet. Please load weights after strategy.prepare()'
)
+ if stage == 3 and precision == 'fp32':
+ warnings.warn(f'Stage 3 only supports fp16. Precision is set to fp16.')
+ precision = 'fp16'
+ self.precision = precision
self.shard_init = shard_init
self.gemini_config = dict(device=get_current_device(),
placement_policy=placement_policy,
@@ -127,7 +134,10 @@ def model_init_context(self):
return super().model_init_context()
def setup_model(self, model: nn.Module) -> nn.Module:
- return zero_model_wrapper(model, zero_stage=self.stage, gemini_config=self.gemini_config)
+ model = zero_model_wrapper(model, zero_stage=self.stage, gemini_config=self.gemini_config)
+ if self.stage != 3 and self.precision == 'fp16':
+ model = model.half()
+ return model
def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
assert isinstance(optimizer, (CPUAdam, HybridAdam)), f'Unsupported optimizer {type(optimizer)}'
@@ -159,7 +169,7 @@ def save_model(self, model: nn.Module, path: str, only_rank0: bool = False, toke
# merge lora_weights into weights
for module in unwrapped_model.modules():
if isinstance(module, LoraLinear):
- module.merge_weights=True
+ module.merge_weights = True
module.eval()
# get state_dict and save
From 045afa3ea20206f28ec43794fa4c23840ae64a5b Mon Sep 17 00:00:00 2001
From: YuliangLiu0306 <72588413+YuliangLiu0306@users.noreply.github.com>
Date: Fri, 24 Mar 2023 12:15:33 +0800
Subject: [PATCH 04/26] [hotfix] skip torchaudio tracing test (#3211)
* [hotfix] skip torchaudio tracing test
* fix lazy init test issue
---
.../test_torchaudio_model/test_torchaudio_model.py | 4 +++-
tests/test_utils/test_lazy_init/test_distribute.py | 6 +++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py b/tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py
index 65f9f5149dda..66f4be5a6f7f 100644
--- a/tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py
+++ b/tests/test_fx/test_tracer/test_torchaudio_model/test_torchaudio_model.py
@@ -6,7 +6,9 @@
from tests.kit.model_zoo import model_zoo
-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+# We cannot handle the tensors constructed with constant during forward, such as ``torch.empty(0).to(device=Proxy.device)``
+# TODO: We could handle this case by hijacking torch.Tensor.to function.
+@pytest.mark.skip
def test_torchaudio_models():
torch.backends.cudnn.deterministic = True
diff --git a/tests/test_utils/test_lazy_init/test_distribute.py b/tests/test_utils/test_lazy_init/test_distribute.py
index 37b2c5da1efa..1e32814ab147 100644
--- a/tests/test_utils/test_lazy_init/test_distribute.py
+++ b/tests/test_utils/test_lazy_init/test_distribute.py
@@ -13,7 +13,11 @@
from colossalai.testing import parameterize, rerun_if_address_is_in_use
from colossalai.utils import free_port
from colossalai.utils.common import print_rank_0
-from colossalai.utils.model.experimental import LazyInitContext, LazyTensor, _MyTensor
+
+try:
+ from colossalai.utils.model.experimental import LazyInitContext, LazyTensor, _MyTensor
+except:
+ pass
from tests.kit.model_zoo import model_zoo
# from utils import assert_dist_model_equal, set_seed
From d32ef94ad9fdd50e101ae4b6a6e2ff567f9acf4c Mon Sep 17 00:00:00 2001
From: binmakeswell
Date: Fri, 24 Mar 2023 13:33:35 +0800
Subject: [PATCH 05/26] [doc] fix typo (#3222)
* [doc] fix typo
* [doc] fix typo
---
README.md | 4 ++--
applications/ChatGPT/examples/README.md | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 5ce18650fb41..3098d72b4591 100644
--- a/README.md
+++ b/README.md
@@ -80,7 +80,7 @@
Use Docker
Community
- Contributing
+ Contributing
Cite Us
@@ -375,7 +375,7 @@ Join the Colossal-AI community on [Forum](https://github.com/hpcaitech/ColossalA
[Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w),
and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your suggestions, feedback, and questions with our engineering team.
-## Invitation to open-source contribution
+## Contributing
Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models!
You may contact us or participate in the following ways:
diff --git a/applications/ChatGPT/examples/README.md b/applications/ChatGPT/examples/README.md
index 60e6d68bdc0f..203e4b4950bd 100644
--- a/applications/ChatGPT/examples/README.md
+++ b/applications/ChatGPT/examples/README.md
@@ -16,7 +16,7 @@ torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain "faceb
```
### Features and tricks in RM training
-- We support [Anthropic/hh-rlhf](https://huggingface.co/datasets/Anthropic/hh-rlhf)and[rm-static](https://huggingface.co/datasets/Dahoas/rm-static) datasets.
+- We support [Anthropic/hh-rlhf](https://huggingface.co/datasets/Anthropic/hh-rlhf) and [rm-static](https://huggingface.co/datasets/Dahoas/rm-static) datasets.
- We support 2 kinds of loss_function named 'log_sig'(used by OpenAI) and 'log_exp'(used by Anthropic).
- We change the loss to valid_acc and pair_dist to monitor progress during training.
- We add special token to the end of the sequence to get better result.
From 052b03e83f30f46a43f87e2c9739ab04f56b6460 Mon Sep 17 00:00:00 2001
From: CsRic <59389055+CsRic@users.noreply.github.com>
Date: Fri, 24 Mar 2023 13:36:16 +0800
Subject: [PATCH 06/26] limit torch version (#3213)
Co-authored-by: csric
---
requirements/requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 8e619ac24477..e32b3ecda063 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -8,4 +8,4 @@ click
fabric
contexttimer
ninja
-torch
+torch>=1.11,<2.0
From 4d5d8f98a49dbbd842742bfd1010384aee70312b Mon Sep 17 00:00:00 2001
From: YuliangLiu0306 <72588413+YuliangLiu0306@users.noreply.github.com>
Date: Fri, 24 Mar 2023 13:39:12 +0800
Subject: [PATCH 07/26] [API] implement device mesh manager (#3221)
* [API] implement device mesh manager
* polish
---
colossalai/cluster/device_mesh_manager.py | 103 ++++++++++++++++--
.../test_cluster/test_device_mesh_manager.py | 40 +++++++
2 files changed, 132 insertions(+), 11 deletions(-)
create mode 100644 tests/test_cluster/test_device_mesh_manager.py
diff --git a/colossalai/cluster/device_mesh_manager.py b/colossalai/cluster/device_mesh_manager.py
index 744799182e22..8754baa19792 100644
--- a/colossalai/cluster/device_mesh_manager.py
+++ b/colossalai/cluster/device_mesh_manager.py
@@ -1,36 +1,117 @@
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+
+import torch
+import torch.distributed as dist
+
+from colossalai.device.alpha_beta_profiler import AlphaBetaProfiler
from colossalai.device.device_mesh import DeviceMesh
+@dataclass
+class DeviceMeshInfo:
+ '''
+ This class is used to store the information used to initialize the device mesh.
+
+ Args:
+ physical_ids (List[int]): The physical ids of the current booster. For example, if we have the last 4 GPUs on a 8-devices cluster, then the physical ids should be [4, 5, 6, 7].
+ mesh_shapes (List[Union[torch.Size, List[int], Tuple[int]]]): The shape of the mesh. For example, if we have 4 GPUs and we want to use 2D mesh with mesh shape [2, 2], then the mesh shape should be [2, 2].
+ '''
+ physical_ids: List[int]
+ mesh_shape: Union[torch.Size, List[int], Tuple[int]] = None
+
+ def __post_init__(self):
+ if self.mesh_shape is not None:
+ world_size = len(self.physical_ids)
+ mesh_shape_numel = torch.Size(self.mesh_shape).numel()
+ assert world_size == mesh_shape_numel, f'the numel of mesh_shape should be equal to world size, but got {world_size} != {mesh_shape_numel}'
+
+
+def initialize_device_mesh(device_mesh_info: DeviceMeshInfo):
+ '''
+ This method is used to initialize the device mesh.
+
+ Args:
+ device_mesh_info (DeviceMeshInfo): The information used to initialize device mesh.
+ '''
+ # parse the device mesh info
+ physical_devices = device_mesh_info.physical_ids
+ physical_mesh = torch.tensor(physical_devices)
+ logical_mesh_shape = device_mesh_info.mesh_shape
+
+ if logical_mesh_shape is None:
+ ab_profiler = AlphaBetaProfiler(physical_devices)
+ # search for the best logical mesh shape
+ logical_mesh_id = ab_profiler.search_best_logical_mesh()
+ logical_mesh_id = torch.Tensor(logical_mesh_id).to(torch.int)
+
+ else:
+ logical_mesh_id = physical_mesh.reshape(logical_mesh_shape)
+
+ device_mesh = DeviceMesh(physical_mesh_id=physical_mesh, logical_mesh_id=logical_mesh_id, init_process_group=True)
+ return device_mesh
+
+
class DeviceMeshManager:
"""
Device mesh manager is responsible for creating and managing device meshes.
"""
def __init__(self):
- self.device_mesh_store = dict()
+ self.device_mesh_store: Dict[str, DeviceMesh] = dict()
- def create_device_mesh(self, name, *args, **kwargs) -> DeviceMesh:
+ def create_device_mesh(self, name, device_mesh_info: DeviceMeshInfo) -> DeviceMesh:
"""
Create a device mesh and store it in the manager.
Args:
name (str): name of the device mesh
- *args: args for DeviceMesh
- **kwargs: kwargs for DeviceMesh
- """
- # TODO(Yuliang): replace *args, **kwargs with explicit arguments
+ device_mesh_info (DeviceMeshInfo): the information used to initialize the device mesh
+ """
if name not in self.device_mesh_store:
- device_mesh = DeviceMesh(*args, **kwargs)
+ device_mesh = initialize_device_mesh(device_mesh_info)
self.device_mesh_store[name] = device_mesh
return device_mesh
else:
raise ValueError(f'Device mesh {name} already exists.')
def get(self, name: str) -> DeviceMesh:
- pass
+ """
+ Get a device mesh by name.
- def destroy(self):
- pass
+ Args:
+ name (str): name of the device mesh
+
+ Returns:
+ DeviceMesh: the device mesh
+ """
+ if name in self.device_mesh_store:
+ return self.device_mesh_store[name]
+ else:
+ raise ValueError(f'Device mesh {name} does not exist.')
+
+ def destroy(self, name: str) -> None:
+ """
+ Destroy a device mesh by name.
+
+ Args:
+ name (str): name of the device mesh
+ """
+ if name in self.device_mesh_store:
+ for pgs in self.device_mesh_store[name].process_groups_dict.values():
+ for pg in pgs:
+ dist.destroy_process_group(pg)
+ del self.device_mesh_store[name]
+ else:
+ raise ValueError(f'Device mesh {name} does not exist.')
def destroy_all(self):
- pass
+ """
+ Destroy all device meshes.
+ """
+ for name in self.device_mesh_store:
+ for pgs in self.device_mesh_store[name].process_groups_dict.values():
+ for pg in pgs:
+ dist.destroy_process_group(pg)
+
+ self.device_mesh_store.clear()
diff --git a/tests/test_cluster/test_device_mesh_manager.py b/tests/test_cluster/test_device_mesh_manager.py
new file mode 100644
index 000000000000..b79814735325
--- /dev/null
+++ b/tests/test_cluster/test_device_mesh_manager.py
@@ -0,0 +1,40 @@
+from functools import partial
+
+import torch
+import torch.multiprocessing as mp
+
+from colossalai.cluster.device_mesh_manager import DeviceMeshInfo, DeviceMeshManager
+from colossalai.device.device_mesh import DeviceMesh
+from colossalai.fx.tracer import ColoTracer
+from colossalai.initialize import launch
+from colossalai.logging import disable_existing_loggers
+from colossalai.utils import free_port
+
+
+def check_device_mesh_manager(rank, world_size, port):
+ disable_existing_loggers()
+ launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+ device_mesh_manager = DeviceMeshManager()
+ device_mesh_info_auto = DeviceMeshInfo(physical_ids=[0, 1, 2, 3],)
+ device_mesh_auto = device_mesh_manager.create_device_mesh('0', device_mesh_info_auto)
+ assert device_mesh_auto.shape == (2, 2)
+ assert device_mesh_auto._logical_mesh_id.tolist() == [[0, 1], [2, 3]]
+
+ device_mesh_info_with_shape = DeviceMeshInfo(
+ physical_ids=[0, 1, 2, 3],
+ mesh_shape=(2, 2),
+ )
+ device_mesh_with_shape = device_mesh_manager.create_device_mesh('1', device_mesh_info_with_shape)
+
+ assert device_mesh_with_shape.shape == (2, 2)
+ assert device_mesh_with_shape._logical_mesh_id.tolist() == [[0, 1], [2, 3]]
+
+
+def test_device_mesh_manager():
+ world_size = 4
+ run_func = partial(check_device_mesh_manager, world_size=world_size, port=free_port())
+ mp.spawn(run_func, nprocs=world_size)
+
+
+if __name__ == '__main__':
+ test_device_mesh_manager()
From 280fcdc4856e061688e3aba49c3b53427967fd7b Mon Sep 17 00:00:00 2001
From: NatalieC323 <127177614+NatalieC323@users.noreply.github.com>
Date: Fri, 24 Mar 2023 18:44:43 +0800
Subject: [PATCH 08/26] polish code (#3194)
Co-authored-by: YuliangLiu0306 <72588413+YuliangLiu0306@users.noreply.github.com>
---
examples/images/diffusion/README.md | 37 ++---
examples/images/diffusion/ldm/data/lsun.py | 74 +++++----
examples/images/diffusion/main.py | 178 ++++++++++++++++-----
3 files changed, 190 insertions(+), 99 deletions(-)
diff --git a/examples/images/diffusion/README.md b/examples/images/diffusion/README.md
index a70792b9f4a4..3f9690500130 100644
--- a/examples/images/diffusion/README.md
+++ b/examples/images/diffusion/README.md
@@ -47,40 +47,21 @@ conda env create -f environment.yaml
conda activate ldm
```
-You can also update an existing [latent diffusion](https://github.com/CompVis/latent-diffusion) environment by running
+You can also update an existing [latent diffusion](https://github.com/CompVis/latent-diffusion) environment by running:
```
conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
pip install transformers diffusers invisible-watermark
```
-#### Step 2: install lightning
-
-Install Lightning version later than 2022.01.04. We suggest you install lightning from source. Notice that the default download path of pip should be within the conda environment, or you may need to specify using 'which pip' and redirect the path into conda environment.
-
-##### From Source
-```
-git clone https://github.com/Lightning-AI/lightning.git
-pip install -r requirements.txt
-python setup.py install
-```
-
-##### From pip
-
-```
-pip install pytorch-lightning
-```
-
-#### Step 3:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website
+#### Step 2:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website
You can install the latest version (0.2.7) from our official website or from source. Notice that the suitable version for this training is colossalai(0.2.5), which stands for torch(1.12.1).
##### Download suggested verision for this training
```
-
pip install colossalai==0.2.5
-
```
##### Download the latest version from pip for latest torch version
@@ -89,7 +70,7 @@ pip install colossalai==0.2.5
pip install colossalai
```
-##### From source
+##### From source:
```
git clone https://github.com/hpcaitech/ColossalAI.git
@@ -99,7 +80,7 @@ cd ColossalAI
CUDA_EXT=1 pip install .
```
-#### Step 4:Accelerate with flash attention by xformers(Optional)
+#### Step 3:Accelerate with flash attention by xformers(Optional)
Notice that xformers will accelerate the training process in cost of extra disk space. The suitable version of xformers for this training process is 0.12.0. You can download xformers directly via pip. For more release versions, feel free to check its official website: [XFormers](./https://pypi.org/project/xformers/)
@@ -113,7 +94,7 @@ To use the stable diffusion Docker image, you can either build using the provide
```
# 1. build from dockerfile
-cd docker
+cd ColossalAI/examples/images/diffusion/docker
docker build -t hpcaitech/diffusion:0.2.0 .
# 2. pull from our docker hub
@@ -127,7 +108,7 @@ Once you have the image ready, you can launch the image with the following comma
# On Your Host Machine #
########################
# make sure you start your image in the repository root directory
-cd Colossal-AI
+cd ColossalAI
# run the docker container
docker run --rm \
@@ -144,13 +125,15 @@ docker run --rm \
# Once you have entered the docker container, go to the stable diffusion directory for training
cd examples/images/diffusion/
+# Download the model checkpoint from pretrained (See the following steps)
+# Set up your configuration the "train_colossalai.sh" (See the following steps)
# start training with colossalai
bash train_colossalai.sh
```
It is important for you to configure your volume mapping in order to get the best training experience.
-1. **Mandatory**, mount your prepared data to `/data/scratch` via `-v :/data/scratch`, where you need to replace `` with the actual data path on your machine.
-2. **Recommended**, store the downloaded model weights to your host machine instead of the container directory via `-v :/root/.cache/huggingface`, where you need to repliace the `` with the actual path. In this way, you don't have to repeatedly download the pretrained weights for every `docker run`.
+1. **Mandatory**, mount your prepared data to `/data/scratch` via `-v :/data/scratch`, where you need to replace `` with the actual data path on your machine. Notice that within docker we need to transform Win expresison into Linuxd, e.g. C:\User\Desktop into /c/User/Desktop.
+2. **Recommended**, store the downloaded model weights to your host machine instead of the container directory via `-v :/root/.cache/huggingface`, where you need to replace the `` with the actual path. In this way, you don't have to repeatedly download the pretrained weights for every `docker run`.
3. **Optional**, if you encounter any problem stating that shared memory is insufficient inside container, please add `-v /dev/shm:/dev/shm` to your `docker run` command.
diff --git a/examples/images/diffusion/ldm/data/lsun.py b/examples/images/diffusion/ldm/data/lsun.py
index 6256e45715ff..f5bf26c14254 100644
--- a/examples/images/diffusion/ldm/data/lsun.py
+++ b/examples/images/diffusion/ldm/data/lsun.py
@@ -5,87 +5,105 @@
from torch.utils.data import Dataset
from torchvision import transforms
-
+# This class is used to create a dataset of images from LSUN dataset for training
class LSUNBase(Dataset):
def __init__(self,
- txt_file,
- data_root,
- size=None,
- interpolation="bicubic",
- flip_p=0.5
+ txt_file, # path to the text file containing the list of image paths
+ data_root, # root directory of the LSUN dataset
+ size=None, # the size of images to resize to
+ interpolation="bicubic", # interpolation method to be used while resizing
+ flip_p=0.5 # probability of random horizontal flipping
):
- self.data_paths = txt_file
- self.data_root = data_root
- with open(self.data_paths, "r") as f:
- self.image_paths = f.read().splitlines()
- self._length = len(self.image_paths)
+ self.data_paths = txt_file # store path to text file containing list of images
+ self.data_root = data_root # store path to root directory of the dataset
+ with open(self.data_paths, "r") as f: # open and read the text file
+ self.image_paths = f.read().splitlines() # read the lines of the file and store as list
+ self._length = len(self.image_paths) # store the number of images
+
+ # create dictionary to hold image path information
self.labels = {
"relative_file_path_": [l for l in self.image_paths],
"file_path_": [os.path.join(self.data_root, l)
for l in self.image_paths],
}
- self.size = size
+ # set the image size to be resized
+ self.size = size
+ # set the interpolation method for resizing the image
self.interpolation = {"linear": PIL.Image.LINEAR,
"bilinear": PIL.Image.BILINEAR,
"bicubic": PIL.Image.BICUBIC,
"lanczos": PIL.Image.LANCZOS,
}[interpolation]
+ # randomly flip the image horizontally with a given probability
self.flip = transforms.RandomHorizontalFlip(p=flip_p)
def __len__(self):
+ # return the length of dataset
return self._length
+
def __getitem__(self, i):
+ # get the image path for the given index
example = dict((k, self.labels[k][i]) for k in self.labels)
image = Image.open(example["file_path_"])
+ # convert it to RGB format
if not image.mode == "RGB":
image = image.convert("RGB")
# default to score-sde preprocessing
- img = np.array(image).astype(np.uint8)
- crop = min(img.shape[0], img.shape[1])
- h, w, = img.shape[0], img.shape[1]
+
+ img = np.array(image).astype(np.uint8) # convert image to numpy array
+ crop = min(img.shape[0], img.shape[1]) # crop the image to a square shape
+ h, w, = img.shape[0], img.shape[1] # get the height and width of image
img = img[(h - crop) // 2:(h + crop) // 2,
- (w - crop) // 2:(w + crop) // 2]
+ (w - crop) // 2:(w + crop) // 2] # crop the image to a square shape
- image = Image.fromarray(img)
- if self.size is not None:
+ image = Image.fromarray(img) # create an image from numpy array
+ if self.size is not None: # if image size is provided, resize the image
image = image.resize((self.size, self.size), resample=self.interpolation)
- image = self.flip(image)
- image = np.array(image).astype(np.uint8)
- example["image"] = (image / 127.5 - 1.0).astype(np.float32)
- return example
-
+ image = self.flip(image) # flip the image horizontally with the given probability
+ image = np.array(image).astype(np.uint8)
+ example["image"] = (image / 127.5 - 1.0).astype(np.float32) # normalize the image values and convert to float32
+ return example # return the example dictionary containing the image and its file paths
+#A dataset class for LSUN Churches training set.
+# It initializes by calling the constructor of LSUNBase class and passing the appropriate arguments.
+# The text file containing the paths to the images and the root directory where the images are stored are passed as arguments. Any additional keyword arguments passed to this class will be forwarded to the constructor of the parent class.
class LSUNChurchesTrain(LSUNBase):
def __init__(self, **kwargs):
super().__init__(txt_file="data/lsun/church_outdoor_train.txt", data_root="data/lsun/churches", **kwargs)
-
+#A dataset class for LSUN Churches validation set.
+# It is similar to LSUNChurchesTrain except that it uses a different text file and sets the flip probability to zero by default.
class LSUNChurchesValidation(LSUNBase):
def __init__(self, flip_p=0., **kwargs):
super().__init__(txt_file="data/lsun/church_outdoor_val.txt", data_root="data/lsun/churches",
flip_p=flip_p, **kwargs)
-
+# A dataset class for LSUN Bedrooms training set.
+# It initializes by calling the constructor of LSUNBase class and passing the appropriate arguments.
class LSUNBedroomsTrain(LSUNBase):
def __init__(self, **kwargs):
super().__init__(txt_file="data/lsun/bedrooms_train.txt", data_root="data/lsun/bedrooms", **kwargs)
-
+# A dataset class for LSUN Bedrooms validation set.
+# It is similar to LSUNBedroomsTrain except that it uses a different text file and sets the flip probability to zero by default.
class LSUNBedroomsValidation(LSUNBase):
def __init__(self, flip_p=0.0, **kwargs):
super().__init__(txt_file="data/lsun/bedrooms_val.txt", data_root="data/lsun/bedrooms",
flip_p=flip_p, **kwargs)
-
+# A dataset class for LSUN Cats training set.
+# It initializes by calling the constructor of LSUNBase class and passing the appropriate arguments.
+# The text file containing the paths to the images and the root directory where the images are stored are passed as arguments.
class LSUNCatsTrain(LSUNBase):
def __init__(self, **kwargs):
super().__init__(txt_file="data/lsun/cat_train.txt", data_root="data/lsun/cats", **kwargs)
-
+# A dataset class for LSUN Cats validation set.
+# It is similar to LSUNCatsTrain except that it uses a different text file and sets the flip probability to zero by default.
class LSUNCatsValidation(LSUNBase):
def __init__(self, flip_p=0., **kwargs):
super().__init__(txt_file="data/lsun/cat_val.txt", data_root="data/lsun/cats",
diff --git a/examples/images/diffusion/main.py b/examples/images/diffusion/main.py
index 4dd88a5eca44..91b809d5a65c 100644
--- a/examples/images/diffusion/main.py
+++ b/examples/images/diffusion/main.py
@@ -44,14 +44,18 @@
class DataLoaderX(DataLoader):
-
+# A custom data loader class that inherits from DataLoader
def __iter__(self):
+ # Overriding the __iter__ method of DataLoader to return a BackgroundGenerator
+ #This is to enable data laoding in the background to improve training performance
return BackgroundGenerator(super().__iter__())
def get_parser(**parser_kwargs):
+ #A function to create an ArgumentParser object and add arguments to it
def str2bool(v):
+ # A helper function to parse boolean values from command line arguments
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "t", "y", "1"):
@@ -60,8 +64,10 @@ def str2bool(v):
return False
else:
raise argparse.ArgumentTypeError("Boolean value expected.")
-
+ # Create an ArgumentParser object with specifies kwargs
parser = argparse.ArgumentParser(**parser_kwargs)
+
+ # Add vairous command line arguments with their default balues and descriptions
parser.add_argument(
"-n",
"--name",
@@ -161,14 +167,18 @@ def str2bool(v):
return parser
-
+# A function that returns the non-default arguments between two objects
def nondefault_trainer_args(opt):
+ # create an argument parsser
parser = argparse.ArgumentParser()
+ # add pytorch lightning trainer default arguments
parser = Trainer.add_argparse_args(parser)
+ # parse the empty arguments to obtain the default values
args = parser.parse_args([])
+ # return all non-default arguments
return sorted(k for k in vars(args) if getattr(opt, k) != getattr(args, k))
-
+# A dataset wrapper class to create a pytorch dataset from an arbitrary object
class WrappedDataset(Dataset):
"""Wraps an arbitrary object with __len__ and __getitem__ into a pytorch dataset"""
@@ -181,7 +191,7 @@ def __len__(self):
def __getitem__(self, idx):
return self.data[idx]
-
+# A function to initialize worker processes
def worker_init_fn(_):
worker_info = torch.utils.data.get_worker_info()
@@ -189,15 +199,18 @@ def worker_init_fn(_):
worker_id = worker_info.id
if isinstance(dataset, Txt2ImgIterableBaseDataset):
+ #divide the dataset into equal parts for each worker
split_size = dataset.num_records // worker_info.num_workers
+ #set the sample IDs for the current worker
# reset num_records to the true number to retain reliable length information
dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
+ # set the seed for the current worker
current_id = np.random.choice(len(np.random.get_state()[1]), 1)
return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
else:
return np.random.seed(np.random.get_state()[1][0] + worker_id)
-
+#Provide functionality for creating data loadedrs based on provided dataset configurations
class DataModuleFromConfig(pl.LightningDataModule):
def __init__(self,
@@ -212,10 +225,12 @@ def __init__(self,
use_worker_init_fn=False,
shuffle_val_dataloader=False):
super().__init__()
+ # Set data module attributes
self.batch_size = batch_size
self.dataset_configs = dict()
self.num_workers = num_workers if num_workers is not None else batch_size * 2
self.use_worker_init_fn = use_worker_init_fn
+ # If a dataset is passed, add it to the dataset configs and create a corresponding dataloader method
if train is not None:
self.dataset_configs["train"] = train
self.train_dataloader = self._train_dataloader
@@ -231,21 +246,28 @@ def __init__(self,
self.wrap = wrap
def prepare_data(self):
+ # Instantiate datasets
for data_cfg in self.dataset_configs.values():
instantiate_from_config(data_cfg)
def setup(self, stage=None):
+ # Instantiate datasets from the dataset configs
self.datasets = dict((k, instantiate_from_config(self.dataset_configs[k])) for k in self.dataset_configs)
+
+ # If wrap is true, create a WrappedDataset for each dataset
if self.wrap:
for k in self.datasets:
self.datasets[k] = WrappedDataset(self.datasets[k])
def _train_dataloader(self):
+ #Check if the train dataset is iterable
is_iterable_dataset = isinstance(self.datasets['train'], Txt2ImgIterableBaseDataset)
+ #Set the worker initialization function of the dataset isiterable or use_worker_init_fn is True
if is_iterable_dataset or self.use_worker_init_fn:
init_fn = worker_init_fn
else:
init_fn = None
+ # Return a DataLoaderX object for the train dataset
return DataLoaderX(self.datasets["train"],
batch_size=self.batch_size,
num_workers=self.num_workers,
@@ -253,10 +275,12 @@ def _train_dataloader(self):
worker_init_fn=init_fn)
def _val_dataloader(self, shuffle=False):
+ #Check if the validation dataset is iterable
if isinstance(self.datasets['validation'], Txt2ImgIterableBaseDataset) or self.use_worker_init_fn:
init_fn = worker_init_fn
else:
init_fn = None
+ # Return a DataLoaderX object for the validation dataset
return DataLoaderX(self.datasets["validation"],
batch_size=self.batch_size,
num_workers=self.num_workers,
@@ -264,7 +288,9 @@ def _val_dataloader(self, shuffle=False):
shuffle=shuffle)
def _test_dataloader(self, shuffle=False):
+ # Check if the test dataset is iterable
is_iterable_dataset = isinstance(self.datasets['train'], Txt2ImgIterableBaseDataset)
+ # Set the worker initialization function if the dataset is iterable or use_worker_init_fn is True
if is_iterable_dataset or self.use_worker_init_fn:
init_fn = worker_init_fn
else:
@@ -291,6 +317,7 @@ def _predict_dataloader(self, shuffle=False):
class SetupCallback(Callback):
+ # I nitialize the callback with the necessary parameters
def __init__(self, resume, now, logdir, ckptdir, cfgdir, config, lightning_config):
super().__init__()
@@ -302,12 +329,14 @@ def __init__(self, resume, now, logdir, ckptdir, cfgdir, config, lightning_confi
self.config = config
self.lightning_config = lightning_config
+ # Save a checkpoint if training is interrupted with keyboard interrupt
def on_keyboard_interrupt(self, trainer, pl_module):
if trainer.global_rank == 0:
print("Summoning checkpoint.")
ckpt_path = os.path.join(self.ckptdir, "last.ckpt")
trainer.save_checkpoint(ckpt_path)
+ # Create necessary directories and save configuration files before training starts
# def on_pretrain_routine_start(self, trainer, pl_module):
def on_fit_start(self, trainer, pl_module):
if trainer.global_rank == 0:
@@ -316,6 +345,7 @@ def on_fit_start(self, trainer, pl_module):
os.makedirs(self.ckptdir, exist_ok=True)
os.makedirs(self.cfgdir, exist_ok=True)
+ #Create trainstep checkpoint directory if necessary
if "callbacks" in self.lightning_config:
if 'metrics_over_trainsteps_checkpoint' in self.lightning_config['callbacks']:
os.makedirs(os.path.join(self.ckptdir, 'trainstep_checkpoints'), exist_ok=True)
@@ -323,11 +353,13 @@ def on_fit_start(self, trainer, pl_module):
print(OmegaConf.to_yaml(self.config))
OmegaConf.save(self.config, os.path.join(self.cfgdir, "{}-project.yaml".format(self.now)))
+ # Save project config and lightning config as YAML files
print("Lightning config")
print(OmegaConf.to_yaml(self.lightning_config))
OmegaConf.save(OmegaConf.create({"lightning": self.lightning_config}),
os.path.join(self.cfgdir, "{}-lightning.yaml".format(self.now)))
+ # Remove log directory if resuming training and directory already exists
else:
# ModelCheckpoint callback created log directory --- remove it
if not self.resume and os.path.exists(self.logdir):
@@ -346,25 +378,28 @@ def on_fit_start(self, trainer, pl_module):
# trainer.save_checkpoint(ckpt_path)
+# PyTorch Lightning callback for ogging images during training and validation of a deep learning model
class ImageLogger(Callback):
def __init__(self,
- batch_frequency,
- max_images,
- clamp=True,
- increase_log_steps=True,
- rescale=True,
- disabled=False,
- log_on_batch_idx=False,
- log_first_step=False,
- log_images_kwargs=None):
+ batch_frequency, # Frequency of batches on which to log images
+ max_images, # Maximum number of images to log
+ clamp=True, # Whether to clamp pixel values to [-1,1]
+ increase_log_steps=True, # Whether to increase frequency of log steps exponentially
+ rescale=True, # Whetehr to rescale pixel values to [0,1]
+ disabled=False, # Whether to disable logging
+ log_on_batch_idx=False, # Whether to log on baych index instead of global step
+ log_first_step=False, # Whetehr to log on the first step
+ log_images_kwargs=None): # Additional keyword arguments to pass to log_images method
super().__init__()
self.rescale = rescale
self.batch_freq = batch_frequency
self.max_images = max_images
self.logger_log_images = {
- pl.loggers.CSVLogger: self._testtube,
+ # Dictionary of logger classes and their corresponding logging methods
+ pl.loggers.CSVLogger: self._testtube,
}
+ # Create a list of exponentially increasing log steps, starting from 1 and ending at batch_frequency
self.log_steps = [2**n for n in range(int(np.log2(self.batch_freq)) + 1)]
if not increase_log_steps:
self.log_steps = [self.batch_freq]
@@ -374,17 +409,32 @@ def __init__(self,
self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {}
self.log_first_step = log_first_step
- @rank_zero_only
- def _testtube(self, pl_module, images, batch_idx, split):
+ @rank_zero_only # Ensure that only the first process in distributed training executes this method
+ def _testtube(self, # The PyTorch Lightning module
+ pl_module, # A dictionary of images to log.
+ images, #
+ batch_idx, # The batch index.
+ split # The split (train/val) on which to log the images
+ ):
+ # Method for logging images using test-tube logger
for k in images:
grid = torchvision.utils.make_grid(images[k])
grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w
tag = f"{split}/{k}"
+ # Add image grid to logger's experiment
pl_module.logger.experiment.add_image(tag, grid, global_step=pl_module.global_step)
@rank_zero_only
- def log_local(self, save_dir, split, images, global_step, current_epoch, batch_idx):
+ def log_local(self,
+ save_dir,
+ split, # The split (train/val) on which to log the images
+ images, # A dictionary of images to log
+ global_step, # The global step
+ current_epoch, # The current epoch.
+ batch_idx
+ ):
+ # Method for saving image grids to local file system
root = os.path.join(save_dir, "images", split)
for k in images:
grid = torchvision.utils.make_grid(images[k], nrow=4)
@@ -396,12 +446,16 @@ def log_local(self, save_dir, split, images, global_step, current_epoch, batch_i
filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(k, global_step, current_epoch, batch_idx)
path = os.path.join(root, filename)
os.makedirs(os.path.split(path)[0], exist_ok=True)
+ # Save image grid as PNG file
Image.fromarray(grid).save(path)
def log_img(self, pl_module, batch, batch_idx, split="train"):
+ #Function for logging images to both the logger and local file system.
check_idx = batch_idx if self.log_on_batch_idx else pl_module.global_step
+ # check if it's time to log an image batch
if (self.check_frequency(check_idx) and # batch_idx % self.batch_freq == 0
hasattr(pl_module, "log_images") and callable(pl_module.log_images) and self.max_images > 0):
+ # Get logger type and check if training mode is on
logger = type(pl_module.logger)
is_train = pl_module.training
@@ -409,8 +463,10 @@ def log_img(self, pl_module, batch, batch_idx, split="train"):
pl_module.eval()
with torch.no_grad():
+ # Get images from log_images method of the pl_module
images = pl_module.log_images(batch, split=split, **self.log_images_kwargs)
+ # Clip images if specified and convert to CPU tensor
for k in images:
N = min(images[k].shape[0], self.max_images)
images[k] = images[k][:N]
@@ -419,15 +475,19 @@ def log_img(self, pl_module, batch, batch_idx, split="train"):
if self.clamp:
images[k] = torch.clamp(images[k], -1., 1.)
+ # Log images locally to file system
self.log_local(pl_module.logger.save_dir, split, images, pl_module.global_step, pl_module.current_epoch,
batch_idx)
+ # log the images using the logger
logger_log_images = self.logger_log_images.get(logger, lambda *args, **kwargs: None)
logger_log_images(pl_module, images, pl_module.global_step, split)
+ # switch back to training mode if necessary
if is_train:
pl_module.train()
+ # The function checks if it's time to log an image batch
def check_frequency(self, check_idx):
if ((check_idx % self.batch_freq) == 0 or
(check_idx in self.log_steps)) and (check_idx > 0 or self.log_first_step):
@@ -439,14 +499,17 @@ def check_frequency(self, check_idx):
return True
return False
+ # Log images on train batch end if logging is not disabled
def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
# if not self.disabled and (pl_module.global_step > 0 or self.log_first_step):
# self.log_img(pl_module, batch, batch_idx, split="train")
pass
+ # Log images on validation batch end if logging is not disabled and in validation mode
def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
if not self.disabled and pl_module.global_step > 0:
self.log_img(pl_module, batch, batch_idx, split="val")
+ # log gradients during calibration if necessary
if hasattr(pl_module, 'calibrate_grad_norm'):
if (pl_module.calibrate_grad_norm and batch_idx % 25 == 0) and batch_idx > 0:
self.log_gradients(trainer, pl_module, batch_idx=batch_idx)
@@ -458,6 +521,7 @@ class CUDACallback(Callback):
def on_train_start(self, trainer, pl_module):
rank_zero_info("Training is starting")
+ #the method is called at the end of each training epoch
def on_train_end(self, trainer, pl_module):
rank_zero_info("Training is ending")
@@ -524,6 +588,7 @@ def on_train_epoch_end(self, trainer, pl_module):
# params:
# key: value
+ # get the current time to create a new logging directory
now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
# add cwd for convenience and to make classes in this file available when
@@ -535,11 +600,13 @@ def on_train_epoch_end(self, trainer, pl_module):
parser = Trainer.add_argparse_args(parser)
opt, unknown = parser.parse_known_args()
+ # Veirfy the arguments are both specified
if opt.name and opt.resume:
raise ValueError("-n/--name and -r/--resume cannot be specified both."
"If you want to resume training in a new log folder, "
"use -n/--name in combination with --resume_from_checkpoint")
+ # Check if the "resume" option is specified, resume training from the checkpoint if it is true
ckpt = None
if opt.resume:
rank_zero_info("Resuming from {}".format(opt.resume))
@@ -557,8 +624,10 @@ def on_train_epoch_end(self, trainer, pl_module):
logdir = opt.resume.rstrip("/")
ckpt = os.path.join(logdir, "checkpoints", "last.ckpt")
+ # Finds all ".yaml" configuration files in the log directory and adds them to the list of base configurations
base_configs = sorted(glob.glob(os.path.join(logdir, "configs/*.yaml")))
opt.base = base_configs + opt.base
+ # Gets the name of the current log directory by splitting the path and taking the last element.
_tmp = logdir.split("/")
nowname = _tmp[-1]
else:
@@ -574,13 +643,17 @@ def on_train_epoch_end(self, trainer, pl_module):
nowname = now + name + opt.postfix
logdir = os.path.join(opt.logdir, nowname)
+ # Sets the checkpoint path of the 'ckpt' option is specified
if opt.ckpt:
ckpt = opt.ckpt
+ # Create the checkpoint and configuration directories within the log directory.
ckptdir = os.path.join(logdir, "checkpoints")
cfgdir = os.path.join(logdir, "configs")
+ # Sets the seed for the random number generator to ensure reproducibility
seed_everything(opt.seed)
+ # Intinalize and save configuratioon using teh OmegaConf library.
try:
# init and save configs
configs = [OmegaConf.load(cfg) for cfg in opt.base]
@@ -593,6 +666,7 @@ def on_train_epoch_end(self, trainer, pl_module):
for k in nondefault_trainer_args(opt):
trainer_config[k] = getattr(opt, k)
+ # Check whether the accelerator is gpu
if not trainer_config["accelerator"] == "gpu":
del trainer_config["accelerator"]
cpu = True
@@ -609,6 +683,7 @@ def on_train_epoch_end(self, trainer, pl_module):
config.model["params"].update({"use_fp16": False})
if ckpt is not None:
+ #If a checkpoint path is specified in the ckpt variable, the code updates the "ckpt" key in the "params" dictionary of the config.model configuration with the value of ckpt
config.model["params"].update({"ckpt": ckpt})
rank_zero_info("Using ckpt_path = {}".format(config.model["params"]["ckpt"]))
@@ -617,7 +692,8 @@ def on_train_epoch_end(self, trainer, pl_module):
trainer_kwargs = dict()
# config the logger
- # default logger configs
+ # Default logger configs to log training metrics during the training process.
+ # These loggers are specified as targets in the dictionary, along with the configuration settings specific to each logger.
default_logger_cfgs = {
"wandb": {
"target": LIGHTNING_PACK_NAME + "loggers.WandbLogger",
@@ -638,6 +714,7 @@ def on_train_epoch_end(self, trainer, pl_module):
}
}
+ # Set up the logger for TensorBoard
default_logger_cfg = default_logger_cfgs["tensorboard"]
if "logger" in lightning_config:
logger_cfg = lightning_config.logger
@@ -660,6 +737,7 @@ def on_train_epoch_end(self, trainer, pl_module):
trainer_kwargs["strategy"] = instantiate_from_config(strategy_cfg)
+ # Set up ModelCheckpoint callback to save best models
# modelcheckpoint - use TrainResult/EvalResult(checkpoint_on=metric) to
# specify which metric is used to determine best models
default_modelckpt_cfg = {
@@ -683,45 +761,50 @@ def on_train_epoch_end(self, trainer, pl_module):
if version.parse(pl.__version__) < version.parse('1.4.0'):
trainer_kwargs["checkpoint_callback"] = instantiate_from_config(modelckpt_cfg)
+ # Set up various callbacks, including logging, learning rate monitoring, and CUDA management
# add callback which sets up log directory
default_callbacks_cfg = {
- "setup_callback": {
+ "setup_callback": { # callback to set up the training
"target": "main.SetupCallback",
"params": {
- "resume": opt.resume,
- "now": now,
- "logdir": logdir,
- "ckptdir": ckptdir,
- "cfgdir": cfgdir,
- "config": config,
- "lightning_config": lightning_config,
+ "resume": opt.resume, # resume training if applicable
+ "now": now,
+ "logdir": logdir, # directory to save the log file
+ "ckptdir": ckptdir, # directory to save the checkpoint file
+ "cfgdir": cfgdir, # directory to save the configuration file
+ "config": config, # configuration dictionary
+ "lightning_config": lightning_config, # LightningModule configuration
}
},
- "image_logger": {
+ "image_logger": { # callback to log image data
"target": "main.ImageLogger",
"params": {
- "batch_frequency": 750,
- "max_images": 4,
- "clamp": True
+ "batch_frequency": 750, # how frequently to log images
+ "max_images": 4, # maximum number of images to log
+ "clamp": True # whether to clamp pixel values to [0,1]
}
},
- "learning_rate_logger": {
+ "learning_rate_logger": { # callback to log learning rate
"target": "main.LearningRateMonitor",
"params": {
- "logging_interval": "step",
- # "log_momentum": True
+ "logging_interval": "step", # logging frequency (either 'step' or 'epoch')
+ # "log_momentum": True # whether to log momentum (currently commented out)
}
},
- "cuda_callback": {
+ "cuda_callback": { # callback to handle CUDA-related operations
"target": "main.CUDACallback"
},
}
+ # If the LightningModule configuration has specified callbacks, use those
+ # Otherwise, create an empty OmegaConf configuration object
if "callbacks" in lightning_config:
callbacks_cfg = lightning_config.callbacks
else:
callbacks_cfg = OmegaConf.create()
-
+
+ # If the 'metrics_over_trainsteps_checkpoint' callback is specified in the
+ # LightningModule configuration, update the default callbacks configuration
if 'metrics_over_trainsteps_checkpoint' in callbacks_cfg:
print(
'Caution: Saving checkpoints every n train steps without deleting. This might require some free space.')
@@ -739,15 +822,17 @@ def on_train_epoch_end(self, trainer, pl_module):
}
}
default_callbacks_cfg.update(default_metrics_over_trainsteps_ckpt_dict)
-
+
+ # Merge the default callbacks configuration with the specified callbacks configuration, and instantiate the callbacks
callbacks_cfg = OmegaConf.merge(default_callbacks_cfg, callbacks_cfg)
trainer_kwargs["callbacks"] = [instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg]
+ # Create a Trainer object with the specified command-line arguments and keyword arguments, and set the log directory
trainer = Trainer.from_argparse_args(trainer_opt, **trainer_kwargs)
trainer.logdir = logdir
- # data
+ # Create a data module based on the configuration file
data = instantiate_from_config(config.data)
# NOTE according to https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
# calling these ourselves should not be necessary but it is.
@@ -755,10 +840,12 @@ def on_train_epoch_end(self, trainer, pl_module):
data.prepare_data()
data.setup()
+ # Print some information about the datasets in the data module
for k in data.datasets:
rank_zero_info(f"{k}, {data.datasets[k].__class__.__name__}, {len(data.datasets[k])}")
- # configure learning rate
+ # Configure learning rate based on the batch size, base learning rate and number of GPUs
+ # If scale_lr is true, calculate the learning rate based on additional factors
bs, base_lr = config.data.params.batch_size, config.model.base_learning_rate
if not cpu:
ngpu = trainer_config["devices"]
@@ -780,7 +867,7 @@ def on_train_epoch_end(self, trainer, pl_module):
rank_zero_info("++++ NOT USING LR SCALING ++++")
rank_zero_info(f"Setting learning rate to {model.learning_rate:.2e}")
- # allow checkpointing via USR1
+ # Allow checkpointing via USR1
def melk(*args, **kwargs):
# run all checkpoint hooks
if trainer.global_rank == 0:
@@ -794,20 +881,23 @@ def divein(*args, **kwargs):
pudb.set_trace()
import signal
-
+ # Assign melk to SIGUSR1 signal and divein to SIGUSR2 signal
signal.signal(signal.SIGUSR1, melk)
signal.signal(signal.SIGUSR2, divein)
- # run
+ # Run the training and validation
if opt.train:
try:
trainer.fit(model, data)
except Exception:
melk()
raise
+ # Print the maximum GPU memory allocated during training
+ print(f"GPU memory usage: {torch.cuda.max_memory_allocated() / 1024**2:.0f} MB")
# if not opt.no_test and not trainer.interrupted:
# trainer.test(model, data)
except Exception:
+ # If there's an exception, debug it if opt.debug is true and the trainer's global rank is 0
if opt.debug and trainer.global_rank == 0:
try:
import pudb as debugger
@@ -816,7 +906,7 @@ def divein(*args, **kwargs):
debugger.post_mortem()
raise
finally:
- # move newly created debug project to debug_runs
+ # Move the log directory to debug_runs if opt.debug is true and the trainer's global
if opt.debug and not opt.resume and trainer.global_rank == 0:
dst, name = os.path.split(logdir)
dst = os.path.join(dst, "debug_runs", name)
From 1653063fce7062dfd652e73691fac44a24698cd5 Mon Sep 17 00:00:00 2001
From: Hakjin Lee
Date: Mon, 27 Mar 2023 10:41:08 +0900
Subject: [PATCH 09/26] [CI] Fix pre-commit workflow (#3238)
---
.github/ISSUE_TEMPLATE/config.yml | 2 +-
.github/ISSUE_TEMPLATE/feature_request.yml | 2 +-
.github/workflows/doc_test_on_pr.yml | 2 +-
.github/workflows/post_commit.yml | 4 +-
.../example_checks/check_dispatch_inputs.py | 54 +++++++-------
.../example_checks/check_example_weekly.py | 74 +++++++++----------
.../example_checks/detect_changed_example.py | 48 ++++++------
7 files changed, 93 insertions(+), 93 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 673b1274c94b..b310fcfefc15 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -8,4 +8,4 @@ contact_links:
about: This issue tracker is not for technical support. Please use WeChat, and ask the community for help.
- name: 😊 Advanced question - GitHub Discussions
url: https://github.com/hpcaitech/ColossalAI/discussions
- about: Use GitHub Discussions for advanced and unanswered technical questions, requiring a maintainer's answer.
\ No newline at end of file
+ about: Use GitHub Discussions for advanced and unanswered technical questions, requiring a maintainer's answer.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index d05bc25f6f41..f12c41b52e6f 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -22,7 +22,7 @@ body:
If applicable, add screenshots to help explain your problem.
**Suggest a potential alternative/fix**
Tell us how we could improve this project.
- **Optional: Affiliation**
+ **Optional: Affiliation**
Institution/email information helps better analyze and evaluate users to improve the project. Welcome to establish in-depth cooperation.
placeholder: |
A clear and concise description of your idea.
diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml
index a083362a7f0f..fbe669582c20 100644
--- a/.github/workflows/doc_test_on_pr.yml
+++ b/.github/workflows/doc_test_on_pr.yml
@@ -71,7 +71,7 @@ jobs:
- name: Checkout ColossalAI
uses: actions/checkout@v3
-
+
- name: Install Doc Test Requirements
run: |
source activate pytorch
diff --git a/.github/workflows/post_commit.yml b/.github/workflows/post_commit.yml
index bf93eabbf43f..1bbc0d2f5c34 100644
--- a/.github/workflows/post_commit.yml
+++ b/.github/workflows/post_commit.yml
@@ -82,7 +82,7 @@ jobs:
# create pull request
- name: Create Pull Request
- if: steps.commit.outputs.status == 'success'
+ if: steps.commit.outcome == 'success'
id: cpr
uses: peter-evans/create-pull-request@v4
with:
@@ -90,7 +90,7 @@ jobs:
title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
- name: Enable Auto-merge for the New PR
- if: steps.commit.outputs.status == 'success'
+ if: steps.commit.outcome == 'success'
uses: peter-evans/enable-pull-request-automerge@v2
with:
pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
diff --git a/.github/workflows/scripts/example_checks/check_dispatch_inputs.py b/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
index 04d2063ec5fc..5bec96187e0c 100644
--- a/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
+++ b/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
@@ -1,27 +1,27 @@
-import argparse
-import os
-
-
-def check_inputs(input_list):
- for path in input_list:
- real_path = os.path.join('examples', path)
- if not os.path.exists(real_path):
- return False
- return True
-
-
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
- args = parser.parse_args()
- name_list = args.fileNameList.split(",")
- is_correct = check_inputs(name_list)
-
- if is_correct:
- print('success')
- else:
- print('failure')
-
-
-if __name__ == '__main__':
- main()
+import argparse
+import os
+
+
+def check_inputs(input_list):
+ for path in input_list:
+ real_path = os.path.join('examples', path)
+ if not os.path.exists(real_path):
+ return False
+ return True
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
+ args = parser.parse_args()
+ name_list = args.fileNameList.split(",")
+ is_correct = check_inputs(name_list)
+
+ if is_correct:
+ print('success')
+ else:
+ print('failure')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.github/workflows/scripts/example_checks/check_example_weekly.py b/.github/workflows/scripts/example_checks/check_example_weekly.py
index 941e90901f3d..83eff644e315 100644
--- a/.github/workflows/scripts/example_checks/check_example_weekly.py
+++ b/.github/workflows/scripts/example_checks/check_example_weekly.py
@@ -1,37 +1,37 @@
-import os
-
-
-def show_files(path, all_files):
- # Traverse all the folder/file in current directory
- file_list = os.listdir(path)
- # Determine the element is folder or file. If file, pass it into list, if folder, recurse.
- for file_name in file_list:
- # Get the abs directory using os.path.join() and store into cur_path.
- cur_path = os.path.join(path, file_name)
- # Determine whether folder
- if os.path.isdir(cur_path):
- show_files(cur_path, all_files)
- else:
- all_files.append(cur_path)
- return all_files
-
-
-def join(input_list, sep=None):
- return (sep or ' ').join(input_list)
-
-
-def main():
- contents = show_files('examples/', [])
- all_loc = []
- for file_loc in contents:
- split_loc = file_loc.split('/')
- # must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
- if len(split_loc) >= 4:
- re_loc = '/'.join(split_loc[1:3])
- if re_loc not in all_loc:
- all_loc.append(re_loc)
- print(all_loc)
-
-
-if __name__ == '__main__':
- main()
+import os
+
+
+def show_files(path, all_files):
+ # Traverse all the folder/file in current directory
+ file_list = os.listdir(path)
+ # Determine the element is folder or file. If file, pass it into list, if folder, recurse.
+ for file_name in file_list:
+ # Get the abs directory using os.path.join() and store into cur_path.
+ cur_path = os.path.join(path, file_name)
+ # Determine whether folder
+ if os.path.isdir(cur_path):
+ show_files(cur_path, all_files)
+ else:
+ all_files.append(cur_path)
+ return all_files
+
+
+def join(input_list, sep=None):
+ return (sep or ' ').join(input_list)
+
+
+def main():
+ contents = show_files('examples/', [])
+ all_loc = []
+ for file_loc in contents:
+ split_loc = file_loc.split('/')
+ # must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
+ if len(split_loc) >= 4:
+ re_loc = '/'.join(split_loc[1:3])
+ if re_loc not in all_loc:
+ all_loc.append(re_loc)
+ print(all_loc)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.github/workflows/scripts/example_checks/detect_changed_example.py b/.github/workflows/scripts/example_checks/detect_changed_example.py
index df4fd67368fc..c69d95a552e9 100644
--- a/.github/workflows/scripts/example_checks/detect_changed_example.py
+++ b/.github/workflows/scripts/example_checks/detect_changed_example.py
@@ -1,24 +1,24 @@
-import argparse
-
-
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
- args = parser.parse_args()
- name_list = args.fileNameList.split(":")
- folder_need_check = set()
- for loc in name_list:
- # Find only the sub-sub-folder of 'example' folder
- # the examples folder structure is like
- # - examples
- # - area
- # - application
- # - file
- if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
- folder_need_check.add('/'.join(loc.split("/")[1:3]))
- # Output the result using print. Then the shell can get the values.
- print(list(folder_need_check))
-
-
-if __name__ == '__main__':
- main()
+import argparse
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
+ args = parser.parse_args()
+ name_list = args.fileNameList.split(":")
+ folder_need_check = set()
+ for loc in name_list:
+ # Find only the sub-sub-folder of 'example' folder
+ # the examples folder structure is like
+ # - examples
+ # - area
+ # - application
+ # - file
+ if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
+ folder_need_check.add('/'.join(loc.split("/")[1:3]))
+ # Output the result using print. Then the shell can get the values.
+ print(list(folder_need_check))
+
+
+if __name__ == '__main__':
+ main()
From 1a229045af97767a21223ee1b3694c9aedac154e Mon Sep 17 00:00:00 2001
From: YH <100389977+yhna940@users.noreply.github.com>
Date: Mon, 27 Mar 2023 10:42:21 +0900
Subject: [PATCH 10/26] Add interface for colo tesnor dp size (#3227)
---
colossalai/gemini/chunk/manager.py | 2 +-
colossalai/tensor/colo_tensor.py | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/colossalai/gemini/chunk/manager.py b/colossalai/gemini/chunk/manager.py
index 30ac4d354647..2fa65c970316 100644
--- a/colossalai/gemini/chunk/manager.py
+++ b/colossalai/gemini/chunk/manager.py
@@ -72,7 +72,7 @@ def register_tensor(self,
if tensor.numel() > chunk_size:
chunk_size = tensor.numel()
- dp_size = tensor.process_group.dp_world_size()
+ dp_size = tensor.get_dp_world_size()
chunk_size = chunk_size + (-chunk_size % dp_size)
chunk = Chunk(
diff --git a/colossalai/tensor/colo_tensor.py b/colossalai/tensor/colo_tensor.py
index bbed8847abbc..40eefc3ec5d1 100644
--- a/colossalai/tensor/colo_tensor.py
+++ b/colossalai/tensor/colo_tensor.py
@@ -138,6 +138,15 @@ def set_process_group(self, pg: ProcessGroup):
def get_tp_world_size(self) -> int:
return self.process_group.tp_world_size()
+ def get_dp_world_size(self) -> int:
+ """get_dp_world_size
+ get the dp world size of the tensor.
+
+ Returns:
+ int: dp world size
+ """
+ return self.process_group.dp_world_size()
+
def set_dist_spec(self, dist_spec: _DistSpec):
"""set_dist_spec
set dist spec and change the payloads.
From 73d3e4d3091a6f87cd5153194989713dd2429210 Mon Sep 17 00:00:00 2001
From: Frank Lee
Date: Mon, 27 Mar 2023 10:24:14 +0800
Subject: [PATCH 11/26] [booster] implemented the torch ddd + resnet example
(#3232)
* [booster] implemented the torch ddd + resnet example
* polish code
---
colossalai/booster/__init__.py | 1 -
colossalai/booster/booster.py | 57 ++++--
colossalai/booster/environment_table.py | 18 --
colossalai/booster/interface/__init__.py | 3 -
.../booster/mixed_precision/fp16_torch.py | 12 +-
.../mixed_precision/mixed_precision_base.py | 2 +-
colossalai/booster/plugin/plugin_base.py | 22 ++-
colossalai/booster/plugin/torch_ddp_plugin.py | 61 +++++-
.../checkpoint_io/checkpoint_io_base.py | 179 ++++++++++++++++--
.../checkpoint_io/general_checkpoint_io.py | 73 +++----
colossalai/cluster/dist_coordinator.py | 40 +++-
colossalai/interface/__init__.py | 4 +
colossalai/interface/model.py | 25 +++
.../{booster => }/interface/optimizer.py | 0
examples/tutorial/new_api/README.md | 5 +
examples/tutorial/new_api/test_ci.sh | 2 +
.../tutorial/new_api/torch_ddp/.gitignore | 4 +
examples/tutorial/new_api/torch_ddp/README.md | 44 +++++
examples/tutorial/new_api/torch_ddp/eval.py | 48 +++++
examples/tutorial/new_api/torch_ddp/train.py | 128 +++++++++++++
.../test_plugin/test_torch_ddp_plugin.py | 4 +-
.../test_general_checkpoint_io.py | 4 +-
22 files changed, 608 insertions(+), 128 deletions(-)
delete mode 100644 colossalai/booster/environment_table.py
delete mode 100644 colossalai/booster/interface/__init__.py
create mode 100644 colossalai/interface/__init__.py
create mode 100644 colossalai/interface/model.py
rename colossalai/{booster => }/interface/optimizer.py (100%)
create mode 100644 examples/tutorial/new_api/README.md
create mode 100644 examples/tutorial/new_api/test_ci.sh
create mode 100644 examples/tutorial/new_api/torch_ddp/.gitignore
create mode 100644 examples/tutorial/new_api/torch_ddp/README.md
create mode 100644 examples/tutorial/new_api/torch_ddp/eval.py
create mode 100644 examples/tutorial/new_api/torch_ddp/train.py
diff --git a/colossalai/booster/__init__.py b/colossalai/booster/__init__.py
index 3b3f45bb0fe2..841054a9c672 100644
--- a/colossalai/booster/__init__.py
+++ b/colossalai/booster/__init__.py
@@ -1,4 +1,3 @@
from .accelerator import Accelerator
from .booster import Booster
-from .environment_table import EnvironmentTable
from .plugin import Plugin
diff --git a/colossalai/booster/booster.py b/colossalai/booster/booster.py
index 230c65a9e0a1..1ad9f7f20ec1 100644
--- a/colossalai/booster/booster.py
+++ b/colossalai/booster/booster.py
@@ -8,6 +8,8 @@
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import DataLoader
+from colossalai.checkpoint_io import GeneralCheckpointIO
+
from .accelerator import Accelerator
from .mixed_precision import MixedPrecision, mixed_precision_factory
from .plugin import Plugin
@@ -61,19 +63,21 @@ def __init__(self,
self.plugin = plugin
# set accelerator
- if self.plugin and self.plugin.control_device:
+ if self.plugin and self.plugin.control_device():
self.accelerator = None
warnings.warn('The plugin will control the accelerator, so the device argument will be ignored.')
else:
self.accelerator = Accelerator(device)
# set precision
- if mixed_precision is None or (self.plugin and self.plugin.control_precision):
- self.mixed_precision = None
+ if self.plugin and self.plugin.control_precision():
warnings.warn('The plugin will control the precision, so the mixed_precision argument will be ignored.')
+ self.mixed_precision = None
+ elif mixed_precision is None:
+ self.mixed_precision = None
else:
# validate and set precision
- if isinstance(MixedPrecision, str):
+ if isinstance(mixed_precision, str):
# the user will take the default arguments for amp training
self.mixed_precision = mixed_precision_factory(mixed_precision)
elif isinstance(mixed_precision, MixedPrecision):
@@ -84,6 +88,11 @@ def __init__(self,
f'Expected the argument mixed_precision to be a string or an instance of Precision, but got {type(mixed_precision)}.'
)
+ if self.plugin is not None and self.plugin.control_checkpoint_io():
+ self.checkpoint_io = self.plugin.get_checkpoint_io()
+ else:
+ self.checkpoint_io = GeneralCheckpointIO()
+
def boost(
self,
model: nn.Module,
@@ -109,12 +118,13 @@ def boost(
model, optimizer, criterion, dataloader, lr_scheduler = self.plugin.configure(
model, optimizer, criterion, dataloader, lr_scheduler)
- if self.plugin and not self.plugin.control_device:
+ if self.plugin and not self.plugin.control_device():
# transform model for accelerator
model = self.accelerator.configure(model)
- if self.mixed_precision and self.plugin and not self.plugin.control_precision:
+ if self.mixed_precision and (self.plugin is None or self.plugin and not self.plugin.control_precision()):
# transform model for mixed precision
+ # when mixed_precision is specified and the plugin is not given or does not control the precision
model, optimizer, criterion = self.mixed_precision.configure(model, optimizer, criterion)
return model, optimizer, criterion, dataloader, lr_scheduler
@@ -140,18 +150,25 @@ def no_sync(self, model: nn.Module) -> contextmanager:
assert self.plugin.support_no_sync, f'The plugin {self.plugin.__class__.__name__} does not support no_sync.'
return self.plugin.no_sync(model)
- def save(self,
- obj: Union[nn.Module, Optimizer, LRScheduler],
- path_like: str,
- plan: str = 'torch',
- **kwargs) -> None:
- # TODO: implement this method
- pass
+ def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
+ self.checkpoint_io.load_model(model, checkpoint, strict)
- def load(self,
- obj: Union[nn.Module, Optimizer, LRScheduler],
- path_like: str,
- plan: str = 'torch',
- **kwargs) -> None:
- # TODO: implement this method
- pass
+ def save_model(self,
+ model: nn.Module,
+ checkpoint: str,
+ prefix: str = None,
+ shard: bool = False,
+ size_per_shard: int = 1024):
+ self.checkpoint_io.save_model(model, checkpoint, prefix, shard, size_per_shard)
+
+ def load_optimizer(self, optimizer: Optimizer, checkpoint: str):
+ self.checkpoint_io.load_optimizer(optimizer, checkpoint)
+
+ def save_optimizer(self, optimizer: Optimizer, checkpoint: str, shard: bool = False, size_per_shard: int = 1024):
+ self.checkpoint_io.save_optimizer(optimizer, checkpoint, shard, size_per_shard)
+
+ def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
+ self.checkpoint_io.save_lr_scheduler(lr_scheduler, checkpoint)
+
+ def load_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
+ self.checkpoint_io.load_lr_scheduler(lr_scheduler, checkpoint)
diff --git a/colossalai/booster/environment_table.py b/colossalai/booster/environment_table.py
deleted file mode 100644
index 4b16f120c1b9..000000000000
--- a/colossalai/booster/environment_table.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from typing import List
-
-__all__ = ['EnvironmentTable']
-
-
-class EnvironmentTable:
-
- def __init__(self, intra_op_world_sizes: List[int]):
- # TODO: implement this method
- pass
-
- @property
- def is_master(self) -> bool:
- # TODO: implement this method
- pass
-
- # TODO: implement more utility methods as given in
- # https://github.com/hpcaitech/ColossalAI/issues/3051
diff --git a/colossalai/booster/interface/__init__.py b/colossalai/booster/interface/__init__.py
deleted file mode 100644
index 8892a13e1814..000000000000
--- a/colossalai/booster/interface/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .optimizer import OptimizerWrapper
-
-__all__ = ['OptimizerWrapper']
diff --git a/colossalai/booster/mixed_precision/fp16_torch.py b/colossalai/booster/mixed_precision/fp16_torch.py
index 054f78d2e226..9999aa5e0eb4 100644
--- a/colossalai/booster/mixed_precision/fp16_torch.py
+++ b/colossalai/booster/mixed_precision/fp16_torch.py
@@ -5,7 +5,8 @@
from torch import Tensor
from torch.optim import Optimizer
-from ..interface import OptimizerWrapper
+from colossalai.interface import ModelWrapper, OptimizerWrapper
+
from .mixed_precision_base import MixedPrecision
__all__ = ['FP16_Torch_MixedPrecision', 'TorchAMPOptimizer', 'TorchAMPModule']
@@ -45,7 +46,9 @@ def backward(self, loss: Tensor, *args, **kwargs) -> None:
scaled_loss.backward(*args, **kwargs)
def step(self, *args, **kwargs) -> Optional[float]:
- return self.scaler.step(self.optim, *args, **kwargs)
+ out = self.scaler.step(self.optim, *args, **kwargs)
+ self.scaler.update()
+ return out
def scale_loss(self, loss: Tensor) -> Tensor:
return self.scaler.scale(loss)
@@ -67,7 +70,7 @@ def clip_grad_by_norm(self,
super().clip_grad_by_norm(max_norm, norm_type, error_if_nonfinite, *args, **kwargs)
-class TorchAMPModule(nn.Module):
+class TorchAMPModule(ModelWrapper):
"""
Module wrapper for mixed precision training in FP16 using PyTorch AMP.
@@ -76,8 +79,7 @@ class TorchAMPModule(nn.Module):
"""
def __init__(self, module: nn.Module):
- super().__init__()
- self.module = module
+ super().__init__(module)
def forward(self, *args, **kwargs):
with torch.cuda.amp.autocast():
diff --git a/colossalai/booster/mixed_precision/mixed_precision_base.py b/colossalai/booster/mixed_precision/mixed_precision_base.py
index d1e8acc82cc6..2490e9811ccf 100644
--- a/colossalai/booster/mixed_precision/mixed_precision_base.py
+++ b/colossalai/booster/mixed_precision/mixed_precision_base.py
@@ -4,7 +4,7 @@
import torch.nn as nn
from torch.optim import Optimizer
-from ..interface import OptimizerWrapper
+from colossalai.interface import OptimizerWrapper
class MixedPrecision(ABC):
diff --git a/colossalai/booster/plugin/plugin_base.py b/colossalai/booster/plugin/plugin_base.py
index 3c347cb4252d..7a222022c1b2 100644
--- a/colossalai/booster/plugin/plugin_base.py
+++ b/colossalai/booster/plugin/plugin_base.py
@@ -6,34 +6,30 @@
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import DataLoader
-from colossalai.booster.interface import OptimizerWrapper
+from colossalai.checkpoint_io import CheckpointIO
+from colossalai.interface import OptimizerWrapper
__all__ = ['Plugin']
class Plugin(ABC):
- @property
@abstractmethod
def supported_devices(self) -> List[str]:
pass
- @property
@abstractmethod
def supported_precisions(self) -> List[str]:
pass
- @property
@abstractmethod
def control_precision(self) -> bool:
pass
- @property
@abstractmethod
def control_device(self) -> bool:
pass
- @property
@abstractmethod
def support_no_sync(self) -> bool:
pass
@@ -49,3 +45,17 @@ def configure(
) -> Tuple[Union[nn.Module, OptimizerWrapper, LRScheduler, DataLoader]]:
# implement this method
pass
+
+ @abstractmethod
+ def control_checkpoint_io(self) -> bool:
+ """
+ Whether the plugin controls the checkpoint io
+ """
+ pass
+
+ @abstractmethod
+ def get_checkpoint_io(self) -> CheckpointIO:
+ """
+ Get checkpoint io object for this plugin, only invoked when control_checkpoint_io is True.
+ """
+ pass
diff --git a/colossalai/booster/plugin/torch_ddp_plugin.py b/colossalai/booster/plugin/torch_ddp_plugin.py
index 07d6be8c748d..d7f3d22d93cc 100644
--- a/colossalai/booster/plugin/torch_ddp_plugin.py
+++ b/colossalai/booster/plugin/torch_ddp_plugin.py
@@ -11,13 +11,61 @@
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
-from colossalai.booster.interface import OptimizerWrapper
+from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO
+from colossalai.cluster import DistCoordinator
+from colossalai.interface import ModelWrapper, OptimizerWrapper
from .plugin_base import Plugin
__all__ = ['TorchDDPPlugin']
+class TorchDDPCheckpointIO(GeneralCheckpointIO):
+
+ def __init__(self) -> None:
+ super().__init__()
+ self.coordinator = DistCoordinator()
+
+ def load_unsharded_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
+ """
+ Load model from checkpoint with automatic unwrapping.
+ """
+ # the model should be unwrapped in self.load_model via ModelWrapper.unwrap
+ return super().load_unsharded_model(model, checkpoint, strict=strict)
+
+ def save_unsharded_model(self, model: nn.Module, checkpoint: str):
+ """
+ Save model to checkpoint but only on master process.
+ """
+ # the model should be unwrapped in self.load_model via ModelWrapper.unwrap
+ if self.coordinator.is_master():
+ super().save_unsharded_model(model, checkpoint)
+
+ def save_unsharded_optimizer(self, optimizer: Optimizer, checkpoint: str):
+ """
+ Save optimizer to checkpoint but only on master process.
+ """
+ if self.coordinator.is_master():
+ super().save_unsharded_optimizer(optimizer, checkpoint)
+
+ def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
+ """
+ Save model to checkpoint but only on master process.
+ """
+ if self.coordinator.is_master():
+ super().save_lr_scheduler(lr_scheduler, checkpoint)
+
+
+class TorchDDPModel(ModelWrapper):
+
+ def __init__(self, module: nn.Module, *args, **kwargs) -> None:
+ super().__init__(module)
+ self.module = DDP(module, *args, **kwargs)
+
+ def unwrap(self):
+ return self.module.module
+
+
class TorchDDPPlugin(Plugin):
"""
Plugin for PyTorch DDP.
@@ -138,10 +186,19 @@ def configure(
# cast model to cuda
model = model.cuda()
+ # convert model to sync bn
+ model = nn.SyncBatchNorm.convert_sync_batchnorm(model, None)
+
# wrap the model with PyTorch DDP
- model = DDP(model, **self.ddp_kwargs)
+ model = TorchDDPModel(model, **self.ddp_kwargs)
if not isinstance(optimizer, OptimizerWrapper):
optimizer = OptimizerWrapper(optimizer)
return model, optimizer, criterion, dataloader, lr_scheduler
+
+ def control_checkpoint_io(self) -> bool:
+ return True
+
+ def get_checkpoint_io(self) -> CheckpointIO:
+ return TorchDDPCheckpointIO()
diff --git a/colossalai/checkpoint_io/checkpoint_io_base.py b/colossalai/checkpoint_io/checkpoint_io_base.py
index 00a65424bece..d6eef7a96cdc 100644
--- a/colossalai/checkpoint_io/checkpoint_io_base.py
+++ b/colossalai/checkpoint_io/checkpoint_io_base.py
@@ -1,13 +1,15 @@
import json
from abc import ABC, abstractmethod
from pathlib import Path
-from typing import Any
+from typing import Any, Union
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
+from colossalai.interface import ModelWrapper
+
__all__ = ['CheckpointIO', 'ShardCheckpointIndexFile']
@@ -37,15 +39,15 @@ class CheckpointIO(ABC):
>>>
>>> # save optimizer to checkpoint
>>> checkpoint_io.save_optimizer(optimizer, 'optimizer.pt')
-
"""
# ======================================
- # Abstract methods for implementation
+ # Public methods
# ======================================
-
- @abstractmethod
- def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
+ def load_model(self,
+ model: Union[nn.Module, ModelWrapper],
+ checkpoint: str,
+ strict: bool = True) -> Union[nn.Module, ModelWrapper]:
"""
Load model from checkpoint.
@@ -59,14 +61,26 @@ def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
strict (bool): whether to strictly enforce that the param name in
the checkpoint match the keys returned by this module's.
"""
- pass
+ ckpt_path = Path(checkpoint)
+ is_sharded = self.is_sharded_checkpoint(ckpt_path)
+
+ origin_model = model
+
+ if isinstance(model, ModelWrapper):
+ model = model.unwrap()
+
+ if is_sharded:
+ self.load_sharded_model(model, ckpt_path, strict)
+ else:
+ self.load_unsharded_model(model, ckpt_path, strict)
+
+ return origin_model
- @abstractmethod
def save_model(self,
- model: nn.Module,
+ model: Union[nn.Module, ModelWrapper],
checkpoint: str,
- prefix: str = None,
shard: bool = False,
+ prefix: str = None,
size_per_shard: int = 1024):
"""
Save model to checkpoint.
@@ -83,17 +97,24 @@ def save_model(self,
Args:
model (nn.Module): model to be saved.
- checkpoint: checkpoint path. The checkpoint path can be :
+ checkpoint (str): checkpoint path. The checkpoint path can be :
1. a file path, e.g. 'model.pt'
2. a directory path to save the sharded checkpoint, e.g. './checkpoints/' when shard = True.
- shard: whether to shard the checkpoint. Default: False. If set to True, the checkpoint will be sharded into
+ shard (bool): whether to shard the checkpoint. Default: False. If set to True, the checkpoint will be sharded into
multiple files. The model shards will be specificed by a `model.index.json` file. When shard = True, please ensure
that the checkpoint path is a directory path instead of a file path.
- size_per_shard (int): size per shard in MB. Default: 1024. This value is only used when shard is set to True.
+ prefix (str): prefix for the model checkpoint file name when shard=True. Default: None.
+ size_per_shard (int): size per shard in MB. Default: 1024. This value is only used when shard = True.
"""
- pass
- @abstractmethod
+ if isinstance(model, ModelWrapper):
+ model = model.unwrap()
+
+ if shard:
+ self.save_sharded_model(model, checkpoint, prefix, size_per_shard)
+ else:
+ self.save_unsharded_model(model, checkpoint)
+
def load_optimizer(self, optimizer: Optimizer, checkpoint: str):
"""
Load optimizer from checkpoint.
@@ -102,19 +123,139 @@ def load_optimizer(self, optimizer: Optimizer, checkpoint: str):
optimizer (Optimizer): optimizer to be loaded.
checkpoint (str): checkpoint path. This value is made compatiblity with the model checkpoints in the
"""
- pass
+ ckpt_path = Path(checkpoint)
+ is_sharded = self.is_sharded_checkpoint(ckpt_path)
- @abstractmethod
- def save_optimizer(self, optimizer: Optimizer, checkpoint: str, shard: bool = False, size_per_shard: int = 1024):
+ if is_sharded:
+ self.load_sharded_optimizer(optimizer, ckpt_path)
+ else:
+ self.load_unsharded_optimizer(optimizer, ckpt_path)
+
+ def save_optimizer(self,
+ optimizer: Optimizer,
+ checkpoint: str,
+ shard: bool = False,
+ prefix: str = None,
+ size_per_shard: int = 1024):
"""
Save optimizer to checkpoint.
Args:
optimizer (Optimizer): optimizer to be saved.
- checkpoint: checkpoint path. The checkpoint path can be :
+ checkpoint (str): checkpoint path. The checkpoint path can be :
1. a file path, e.g. 'model.pt'
2. a path to a json file which defines the index to the sharded checkpoint for the optimizer
3. a path to a folder containing a unique .index.json file for sharded checkpoint
+ shard (bool): whether to shard the checkpoint. Default: False. If set to True, the checkpoint will be sharded into
+ multiple files. The optimizer shards will be specificed by a `optimizer.index.json` file.
+ prefix (str): prefix for the optimizer checkpoint when shard = True. Default: None.
+ size_per_shard (int): size per shard in MB. Default: 1024. This value is only used when shard is set to True.
+ """
+ if shard:
+ self.save_sharded_optimizer(optimizer, checkpoint, prefix, size_per_shard)
+ else:
+ self.save_unsharded_optimizer(optimizer, checkpoint)
+
+ # ========================================================
+ # Abstract methods for model loading/saving implementation
+ # ========================================================
+ @abstractmethod
+ def load_sharded_model(self, model: nn.Module, checkpoint: Path, strict: bool):
+ """
+ Load model from sharded checkpoint.
+
+ Args:
+ model (nn.Module): model to be loaded.
+ checkpoint (str): checkpoint path. It should be path to the .index.json file or a path to a directory which contains a .index.json file.
+ """
+ pass
+
+ @abstractmethod
+ def load_unsharded_model(self, model: nn.Module, checkpoint: Path, strict: bool):
+ """
+ Load model from unsharded checkpoint.
+
+ Args:
+ model (nn.Module): model to be loaded.
+ checkpoint (str): checkpoint path. It should be a single file path pointing to a model weight binary.
+ strict (bool): whether to strictly enforce that the param name in
+ the checkpoint match the keys returned by this module's.
+ """
+ pass
+
+ @abstractmethod
+ def save_sharded_model(self, model: nn.Module, checkpoint: Path, prefix: str, size_per_shard: int):
+ """
+ Save model to sharded checkpoint.
+
+ Args:
+ model (nn.Module): model to be saved.
+ checkpoint (Path): checkpoint path. It should be a directory path.
+ prefix (str): prefix for the model checkpoint.
+ size_per_shard (int): size per shard in MB.
+ """
+ pass
+
+ @abstractmethod
+ def save_unsharded_model(self, model: nn.Module, checkpoint: Path):
+ """
+ Save model to unsharded checkpoint.
+
+ Args:
+ model (nn.Module): model to be saved.
+ checkpoint (Path): checkpoint path. It should be a single file path pointing to a model weight binary.
+ """
+ pass
+
+ # ========================================================
+ # Abstract methods for optimizer loading/saving implementation
+ # ========================================================
+
+ @abstractmethod
+ def load_sharded_optimizer(self, optimizer: Optimizer, checkpoint: Path, prefix: str, size_per_shard: int):
+ """
+ Load optimizer from sharded checkpoint.
+
+ Args:
+ optimizer (Optimizer): optimizer to be loaded.
+ checkpoint (str): checkpoint path. It should be path to the .index.json file or a path to a directory which contains a .index.json file.
+ prefix (str): prefix for the optimizer checkpoint.
+ size_per_shard (int): size per shard in MB.
+ """
+ pass
+
+ @abstractmethod
+ def load_unsharded_optimizer(self, optimizer: Optimizer, checkpoint: Path):
+ """
+ Load optimizer from unsharded checkpoint.
+
+ Args:
+ optimizer (Optimizer): optimizer to be loaded.
+ checkpoint (str): checkpoint path. It should be a single file path pointing to a model weight binary.
+ """
+ pass
+
+ @abstractmethod
+ def save_sharded_optimizer(self, optimizer: Optimizer, checkpoint: Path, prefix: str, size_per_shard: int):
+ """
+ Save optimizer to sharded checkpoint.
+
+ Args:
+ optimizer (Optimizer): optimizer to be saved.
+ checkpoint (Path): checkpoint path. It should be a directory path.
+ prefix (str): prefix for the optimizer checkpoint.
+ size_per_shard (int): size per shard in MB.
+ """
+ pass
+
+ @abstractmethod
+ def save_unsharded_optimizer(self, optimizer: Optimizer, checkpoint: Path):
+ """
+ Save optimizer to unsharded checkpoint.
+
+ Args:
+ optimizer (Optimizer): optimizer to be saved.
+ checkpoint (str): checkpoint path. It should be a single file path pointing to a model weight binary.
"""
pass
diff --git a/colossalai/checkpoint_io/general_checkpoint_io.py b/colossalai/checkpoint_io/general_checkpoint_io.py
index 0a3636655530..cfabcfa5589f 100644
--- a/colossalai/checkpoint_io/general_checkpoint_io.py
+++ b/colossalai/checkpoint_io/general_checkpoint_io.py
@@ -10,57 +10,36 @@
class GeneralCheckpointIO(CheckpointIO):
- def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
- checkpoint = Path(checkpoint)
- is_sharded = self.is_sharded_checkpoint(checkpoint)
+ def load_sharded_model(self, model: nn.Module, checkpoint: Path, strict: bool):
+ index_file_path = self.get_sharded_checkpoint_index_file(checkpoint)
- if not is_sharded:
- checkpoint = self.load_state_dict(checkpoint)
- model.load_state_dict(checkpoint, strict=strict)
- else:
- # find the index file
- checkpoint_path = Path(checkpoint)
- index_file_path = self.get_sharded_checkpoint_index_file(checkpoint_path)
+ # iterate over the shard checkpoint files
+ # and load each
+ shard_files = self.get_checkpoint_shard_filenames(index_file_path)
+ for shard_file in shard_files:
+ shard_checkpoint = self.load_state_dict(shard_file)
+ model.load_state_dict(shard_checkpoint, strict=strict)
- # iterate over the shard checkpoint files
- # and load each
- shard_files = self.get_checkpoint_shard_filenames(index_file_path)
- for shard_file in shard_files:
- shard_checkpoint = self.load_state_dict(shard_file)
- model.load_state_dict(shard_checkpoint, strict=strict)
+ def load_unsharded_model(self, model: nn.Module, checkpoint: Path, strict: bool):
+ checkpoint = self.load_state_dict(str(checkpoint))
+ model.load_state_dict(checkpoint, strict=strict)
- return model
+ def save_sharded_model(self, model: nn.Module, checkpoint: Path, prefix: str, size_per_shard: int):
+ # TODO(FrankLeeeee): implement this method as it can be supported by Huggingface model
+ raise NotImplementedError("Sharded model checkpoint is not supported yet.")
- def save_model(self,
- model: nn.Module,
- checkpoint: str,
- prefix: str = None,
- shard: bool = False,
- size_per_shard: int = 1024):
- checkpoint = Path(checkpoint)
- if shard:
- # TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint
- raise NotImplementedError("Not implemented yet")
- else:
- self.save_checkpoint(model.state_dict(), checkpoint)
+ def save_unsharded_model(self, model: nn.Module, checkpoint: Path):
+ self.save_checkpoint(model.state_dict(), checkpoint)
- def load_optimizer(self, optimizer: Optimizer, checkpoint: str):
- checkpoint = Path(checkpoint)
- is_sharded = self.is_sharded_checkpoint(checkpoint)
+ def load_sharded_optimizer(self, optimizer: Optimizer, checkpoint: Path, prefix: str, size_per_shard: int):
+ raise NotImplementedError("Sharded optimizer checkpoint is not supported yet.")
- if not is_sharded:
- checkpoint = self.load_state_dict(checkpoint)
- optimizer.load_state_dict(checkpoint)
- else:
- # TODO(FrankLeeeee): implement checkpoint loading from sharded checkpoint
- # This is not an urgent feature, so we can leave it for later
- # let's implement this when we test large-scale models
- pass
- return optimizer
+ def load_unsharded_optimizer(self, optimizer: Optimizer, checkpoint: Path):
+ checkpoint = self.load_state_dict(checkpoint)
+ optimizer.load_state_dict(checkpoint)
- def save_optimizer(self, optimizer: Optimizer, checkpoint: str, shard: bool = False, size_per_shard: int = 1024):
- if shard:
- # TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint
- pass
- else:
- self.save_checkpoint(optimizer.state_dict(), checkpoint)
+ def save_sharded_optimizer(self, optimizer: Optimizer, checkpoint: Path, prefix: str, size_per_shard: int):
+ raise NotImplementedError("Sharded optimizer checkpoint is not supported yet.")
+
+ def save_unsharded_optimizer(self, optimizer: Optimizer, checkpoint: Path):
+ self.save_checkpoint(optimizer.state_dict(), checkpoint)
diff --git a/colossalai/cluster/dist_coordinator.py b/colossalai/cluster/dist_coordinator.py
index 6b48faf5b720..99dde810e112 100644
--- a/colossalai/cluster/dist_coordinator.py
+++ b/colossalai/cluster/dist_coordinator.py
@@ -1,3 +1,4 @@
+import functools
import os
from contextlib import contextmanager
@@ -141,12 +142,12 @@ def priority_execution(self, executor_rank: int = 0, process_group: ProcessGroup
should_block = rank != executor_rank
if should_block:
- dist.barrier(group=process_group)
+ self.block_all(process_group)
yield
if not should_block:
- dist.barrier(group=process_group)
+ self.block_all(process_group)
def destroy(self, process_group: ProcessGroup = None):
"""
@@ -156,3 +157,38 @@ def destroy(self, process_group: ProcessGroup = None):
process_group (ProcessGroup, optional): process group to destroy. Defaults to None, which refers to the default process group.
"""
dist.destroy_process_group(process_group)
+
+ def block_all(self, process_group: ProcessGroup = None):
+ """
+ Block all processes in the process group.
+
+ Args:
+ process_group (ProcessGroup, optional): process group to block. Defaults to None, which refers to the default process group.
+ """
+ dist.barrier(group=process_group)
+
+ def on_master_only(self, process_group: ProcessGroup = None):
+ """
+ A function wrapper that only executes the wrapped function on the master process (rank 0).
+
+ Example:
+ >>> from colossalai.cluster import DistCoordinator
+ >>> dist_coordinator = DistCoordinator()
+ >>>
+ >>> @dist_coordinator.on_master_only()
+ >>> def print_on_master(msg):
+ >>> print(msg)
+ """
+ is_master = self.is_master(process_group)
+
+ # define an inner functiuon
+ def decorator(func):
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ if is_master:
+ return func(*args, **kwargs)
+
+ return wrapper
+
+ return decorator
diff --git a/colossalai/interface/__init__.py b/colossalai/interface/__init__.py
new file mode 100644
index 000000000000..8c658e375146
--- /dev/null
+++ b/colossalai/interface/__init__.py
@@ -0,0 +1,4 @@
+from .model import ModelWrapper
+from .optimizer import OptimizerWrapper
+
+__all__ = ['OptimizerWrapper', 'ModelWrapper']
diff --git a/colossalai/interface/model.py b/colossalai/interface/model.py
new file mode 100644
index 000000000000..a067d7671ce7
--- /dev/null
+++ b/colossalai/interface/model.py
@@ -0,0 +1,25 @@
+import torch.nn as nn
+
+
+class ModelWrapper(nn.Module):
+ """
+ A wrapper class to define the common interface used by booster.
+
+ Args:
+ module (nn.Module): The model to be wrapped.
+ """
+
+ def __init__(self, module: nn.Module) -> None:
+ super().__init__()
+ self.module = module
+
+ def unwrap(self):
+ """
+ Unwrap the model to return the original model for checkpoint saving/loading.
+ """
+ if isinstance(self.module, ModelWrapper):
+ return self.module.unwrap()
+ return self.module
+
+ def forward(self, *args, **kwargs):
+ return self.module(*args, **kwargs)
diff --git a/colossalai/booster/interface/optimizer.py b/colossalai/interface/optimizer.py
similarity index 100%
rename from colossalai/booster/interface/optimizer.py
rename to colossalai/interface/optimizer.py
diff --git a/examples/tutorial/new_api/README.md b/examples/tutorial/new_api/README.md
new file mode 100644
index 000000000000..cec88f41caf1
--- /dev/null
+++ b/examples/tutorial/new_api/README.md
@@ -0,0 +1,5 @@
+# New API Features
+
+**The New API is not officially released yet.**
+
+This folder contains some of the demonstrations of the new API. The new API is still under intensive development and will be released soon.
diff --git a/examples/tutorial/new_api/test_ci.sh b/examples/tutorial/new_api/test_ci.sh
new file mode 100644
index 000000000000..8b4475e9f147
--- /dev/null
+++ b/examples/tutorial/new_api/test_ci.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env
+echo "The CI integration will be completed when the API is stable"
diff --git a/examples/tutorial/new_api/torch_ddp/.gitignore b/examples/tutorial/new_api/torch_ddp/.gitignore
new file mode 100644
index 000000000000..a79cf5236c08
--- /dev/null
+++ b/examples/tutorial/new_api/torch_ddp/.gitignore
@@ -0,0 +1,4 @@
+data
+checkpoint
+ckpt-fp16
+ckpt-fp32
diff --git a/examples/tutorial/new_api/torch_ddp/README.md b/examples/tutorial/new_api/torch_ddp/README.md
new file mode 100644
index 000000000000..62d5a083d0a1
--- /dev/null
+++ b/examples/tutorial/new_api/torch_ddp/README.md
@@ -0,0 +1,44 @@
+# Distributed Data Parallel
+
+## 🚀 Quick Start
+
+This example provides a training script and and evaluation script. The training script provides a an example of training ResNet on CIFAR10 dataset from scratch.
+
+- Training Arguments
+ - `-r, `--resume`: resume from checkpoint file path
+ - `-c`, `--checkpoint`: the folder to save checkpoints
+ - `-i`, `--interval`: epoch interval to save checkpoints
+ - `-f`, `--fp16`: use fp16
+
+- Eval Arguments
+ - `-e`, `--epoch`: select the epoch to evaluate
+ - `-c`, `--checkpoint`: the folder where checkpoints are found
+
+
+### Train
+
+```bash
+# train with torch DDP with fp32
+colossalai run --nproc_per_node 2 train.py -c ./ckpt-fp32
+
+# train with torch DDP with mixed precision training
+colossalai run --nproc_per_node 2 train.py -c ./ckpt-fp16 --fp16
+```
+
+### Eval
+
+```bash
+# evaluate fp32 training
+python eval.py -c ./ckpt-fp32 -e 80
+
+# evaluate fp16 mixed precision training
+python eval.py -c ./ckpt-fp16 -e 80
+```
+
+Expected accuracy performance will be:
+
+| Model | Single-GPU Baseline FP32 | Booster DDP with FP32 | Booster DDP with FP16 |
+| --------- | ------------------------ | --------------------- | --------------------- |
+| ResNet-18 | 85.85% | 85.03% | 85.12% |
+
+**Note: the baseline is a adapted from the [script](https://pytorch-tutorial.readthedocs.io/en/latest/tutorial/chapter03_intermediate/3_2_2_cnn_resnet_cifar10/) to use `torchvision.models.resnet18`**
diff --git a/examples/tutorial/new_api/torch_ddp/eval.py b/examples/tutorial/new_api/torch_ddp/eval.py
new file mode 100644
index 000000000000..657708ec3ff2
--- /dev/null
+++ b/examples/tutorial/new_api/torch_ddp/eval.py
@@ -0,0 +1,48 @@
+import argparse
+
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms as transforms
+
+# ==============================
+# Parse Arguments
+# ==============================
+parser = argparse.ArgumentParser()
+parser.add_argument('-e', '--epoch', type=int, default=80, help="resume from the epoch's checkpoint")
+parser.add_argument('-c', '--checkpoint', type=str, default='./checkpoint', help="checkpoint directory")
+args = parser.parse_args()
+
+# ==============================
+# Prepare Test Dataset
+# ==============================
+# CIFAR-10 dataset
+test_dataset = torchvision.datasets.CIFAR10(root='./data/', train=False, transform=transforms.ToTensor())
+
+# Data loader
+test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)
+
+# ==============================
+# Load Model
+# ==============================
+model = torchvision.models.resnet18(num_classes=10).cuda()
+state_dict = torch.load(f'{args.checkpoint}/model_{args.epoch}.pth')
+model.load_state_dict(state_dict)
+
+# ==============================
+# Run Evaluation
+# ==============================
+model.eval()
+
+with torch.no_grad():
+ correct = 0
+ total = 0
+ for images, labels in test_loader:
+ images = images.cuda()
+ labels = labels.cuda()
+ outputs = model(images)
+ _, predicted = torch.max(outputs.data, 1)
+ total += labels.size(0)
+ correct += (predicted == labels).sum().item()
+
+ print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
diff --git a/examples/tutorial/new_api/torch_ddp/train.py b/examples/tutorial/new_api/torch_ddp/train.py
new file mode 100644
index 000000000000..4741c3151cbb
--- /dev/null
+++ b/examples/tutorial/new_api/torch_ddp/train.py
@@ -0,0 +1,128 @@
+import argparse
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms as transforms
+from torch.optim.lr_scheduler import MultiStepLR
+
+import colossalai
+from colossalai.booster import Booster
+from colossalai.booster.plugin import TorchDDPPlugin
+from colossalai.cluster import DistCoordinator
+
+# ==============================
+# Parse Arguments
+# ==============================
+parser = argparse.ArgumentParser()
+parser.add_argument('-r', '--resume', type=int, default=-1, help="resume from the epoch's checkpoint")
+parser.add_argument('-c', '--checkpoint', type=str, default='./checkpoint', help="checkpoint directory")
+parser.add_argument('-i', '--interval', type=int, default=5, help="interval of saving checkpoint")
+parser.add_argument('-f', '--fp16', action='store_true', help="use fp16")
+args = parser.parse_args()
+
+# ==============================
+# Prepare Checkpoint Directory
+# ==============================
+Path(args.checkpoint).mkdir(parents=True, exist_ok=True)
+
+# ==============================
+# Prepare Hyperparameters
+# ==============================
+NUM_EPOCHS = 80
+LEARNING_RATE = 1e-3
+START_EPOCH = args.resume if args.resume >= 0 else 0
+
+# ==============================
+# Launch Distributed Environment
+# ==============================
+colossalai.launch_from_torch(config={})
+coordinator = DistCoordinator()
+
+# update the learning rate with linear scaling
+# old_gpu_num / old_lr = new_gpu_num / new_lr
+LEARNING_RATE *= coordinator.world_size
+
+# ==============================
+# Prepare Booster
+# ==============================
+plugin = TorchDDPPlugin()
+if args.fp16:
+ booster = Booster(mixed_precision='fp16', plugin=plugin)
+else:
+ booster = Booster(plugin=plugin)
+
+# ==============================
+# Prepare Train Dataset
+# ==============================
+transform = transforms.Compose(
+ [transforms.Pad(4),
+ transforms.RandomHorizontalFlip(),
+ transforms.RandomCrop(32),
+ transforms.ToTensor()])
+
+# CIFAR-10 dataset
+with coordinator.priority_execution():
+ train_dataset = torchvision.datasets.CIFAR10(root='./data/', train=True, transform=transform, download=True)
+
+# ====================================
+# Prepare model, optimizer, criterion
+# ====================================
+# resent50
+model = torchvision.models.resnet18(num_classes=10).cuda()
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
+
+# lr scheduler
+lr_scheduler = MultiStepLR(optimizer, milestones=[20, 40, 60, 80], gamma=1 / 3)
+
+# prepare dataloader with torch ddp plugin
+train_dataloader = plugin.prepare_train_dataloader(train_dataset, batch_size=100, shuffle=True)
+
+# ==============================
+# Resume from checkpoint
+# ==============================
+if args.resume >= 0:
+ booster.load_model(model, f'{args.checkpoint}/model_{args.resume}.pth')
+ booster.load_optimizer(optimizer, f'{args.checkpoint}/optimizer_{args.resume}.pth')
+ booster.load_lr_scheduler(lr_scheduler, f'{args.checkpoint}/lr_scheduler_{args.resume}.pth')
+
+# ==============================
+# Boost with ColossalAI
+# ==============================
+model, optimizer, criterion, train_dataloader, lr_scheduler = booster.boost(model, optimizer, criterion,
+ train_dataloader, lr_scheduler)
+
+# ==============================
+# Train model
+# ==============================
+total_step = len(train_dataloader)
+
+for epoch in range(START_EPOCH, NUM_EPOCHS):
+ for i, (images, labels) in enumerate(train_dataloader):
+ images = images.cuda()
+ labels = labels.cuda()
+
+ # Forward pass
+ outputs = model(images)
+ loss = criterion(outputs, labels)
+
+ # Backward and optimize
+ optimizer.zero_grad()
+ booster.backward(loss, optimizer)
+ optimizer.step()
+
+ if (i + 1) % 100 == 0:
+ print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(epoch + 1, NUM_EPOCHS, i + 1, total_step,
+ loss.item()))
+
+ lr_scheduler.step()
+
+ # save checkpoint every 5 epoch
+ if (epoch + 1) % args.interval == 0:
+ booster.save_model(model, f'{args.checkpoint}/model_{epoch + 1}.pth')
+ booster.save_optimizer(optimizer, f'{args.checkpoint}/optimizer_{epoch + 1}.pth')
+ booster.save_lr_scheduler(lr_scheduler, f'{args.checkpoint}/lr_scheduler_{epoch + 1}.pth')
diff --git a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
index 58aef54c4967..2dcc5a5bba27 100644
--- a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
+++ b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py
@@ -8,8 +8,8 @@
import colossalai
from colossalai.booster import Booster
-from colossalai.booster.interface import OptimizerWrapper
from colossalai.booster.plugin import TorchDDPPlugin
+from colossalai.interface import OptimizerWrapper
from colossalai.testing import rerun_if_address_is_in_use
from colossalai.utils import free_port
from tests.kit.model_zoo import model_zoo
@@ -34,7 +34,7 @@ def check_torch_ddp_plugin():
model, optimizer, criterion, _, _ = booster.boost(model, optimizer, criterion)
- assert isinstance(model, DDP)
+ assert isinstance(model.module, DDP)
assert isinstance(optimizer, OptimizerWrapper)
output = model(**data)
diff --git a/tests/test_checkpoint_io/test_general_checkpoint_io.py b/tests/test_checkpoint_io/test_general_checkpoint_io.py
index 48376aaa88bf..f9f0e03c4fa1 100644
--- a/tests/test_checkpoint_io/test_general_checkpoint_io.py
+++ b/tests/test_checkpoint_io/test_general_checkpoint_io.py
@@ -42,8 +42,8 @@ def test_unsharded_checkpoint():
new_optimizer = Adam(new_model.parameters(), lr=0.001)
# load the model and optimizer
- new_model = ckpt_io.load_model(new_model, model_ckpt_tempfile.name)
- new_optimizer = ckpt_io.load_optimizer(new_optimizer, optimizer_ckpt_tempfile.name)
+ ckpt_io.load_model(new_model, model_ckpt_tempfile.name)
+ ckpt_io.load_optimizer(new_optimizer, optimizer_ckpt_tempfile.name)
# do recursive check for the optimizer state dict
# if the value is a dict, compare its values
From 02b058032db1b3baa2a74fb2313de5c09830272d Mon Sep 17 00:00:00 2001
From: HELSON
Date: Mon, 27 Mar 2023 15:22:17 +0800
Subject: [PATCH 12/26] [fx] meta registration compatibility (#3253)
* [fx] meta registration compatibility
* fix error
---
colossalai/fx/_compatibility.py | 18 ++++--
...ta_registrations.py => _meta_regist_12.py} | 0
colossalai/fx/_meta_regist_13.py | 57 +++++++++++++++++++
3 files changed, 71 insertions(+), 4 deletions(-)
rename colossalai/fx/{_meta_registrations.py => _meta_regist_12.py} (100%)
create mode 100644 colossalai/fx/_meta_regist_13.py
diff --git a/colossalai/fx/_compatibility.py b/colossalai/fx/_compatibility.py
index 126403270301..6caad920d2ae 100644
--- a/colossalai/fx/_compatibility.py
+++ b/colossalai/fx/_compatibility.py
@@ -2,11 +2,21 @@
import torch
-try:
- from . import _meta_registrations
- META_COMPATIBILITY = True
-except:
+TORCH_MAJOR = int(torch.__version__.split('.')[0])
+TORCH_MINOR = int(torch.__version__.split('.')[1])
+
+if TORCH_MAJOR == 1 and TORCH_MINOR < 12:
META_COMPATIBILITY = False
+elif TORCH_MAJOR == 1 and TORCH_MINOR == 12:
+ from . import _meta_regist_12
+ META_COMPATIBILITY = True
+elif TORCH_MAJOR == 1 and TORCH_MINOR == 13:
+ from . import _meta_regist_13
+ META_COMPATIBILITY = True
+elif TORCH_MAJOR == 2:
+ from . import _meta_regist_13
+ META_COMPATIBILITY = True
+ raise UserWarning("Colossalai is not tested with torch2.0 yet!!!")
def compatibility(is_backward_compatible: bool = False) -> Callable:
diff --git a/colossalai/fx/_meta_registrations.py b/colossalai/fx/_meta_regist_12.py
similarity index 100%
rename from colossalai/fx/_meta_registrations.py
rename to colossalai/fx/_meta_regist_12.py
diff --git a/colossalai/fx/_meta_regist_13.py b/colossalai/fx/_meta_regist_13.py
new file mode 100644
index 000000000000..6caa87c449ab
--- /dev/null
+++ b/colossalai/fx/_meta_regist_13.py
@@ -0,0 +1,57 @@
+import torch
+from torch._meta_registrations import register_meta
+from torch._prims_common import check
+
+aten = torch.ops.aten
+
+
+# since we fix the torch version to 1.13.1, we have to add unimplemented meta ops
+# all these functions are from here https://github.com/pytorch/pytorch/blob/master/torch/_meta_registrations.py
+@register_meta([aten.convolution_backward.default])
+def meta_convolution_backward(
+ grad_output_,
+ input_,
+ weight_,
+ bias_sizes_opt,
+ stride,
+ padding,
+ dilation,
+ transposed,
+ output_padding,
+ groups,
+ output_mask,
+):
+ # High level logic taken from slow_conv3d_backward_cpu which should
+ # be representative of all convolution_backward impls
+ backend_grad_input = None
+ backend_grad_weight = None
+ backend_grad_bias = None
+
+ if output_mask[0]:
+ backend_grad_input = grad_output_.new_empty(input_.size())
+ if output_mask[1]:
+ backend_grad_weight = grad_output_.new_empty(weight_.size())
+ if output_mask[2]:
+ backend_grad_bias = grad_output_.new_empty(bias_sizes_opt)
+
+ return (backend_grad_input, backend_grad_weight, backend_grad_bias)
+
+
+@register_meta(aten._adaptive_avg_pool2d_backward.default)
+def meta__adaptive_avg_pool2d_backward(grad_out, self):
+ ndim = grad_out.ndim
+ for i in range(1, ndim):
+ check(
+ grad_out.size(i) > 0,
+ lambda: f"adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero \
+ size for non-batch dimensions, {grad_out.shape} with dimension {i} being empty",
+ )
+ check(
+ ndim == 3 or ndim == 4,
+ lambda: f"adaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got {self.shape}",
+ )
+ check(
+ self.dtype == grad_out.dtype,
+ lambda: f"expected dtype {self.dtype} for `grad_output` but got dtype {grad_out.dtype}",
+ )
+ return self.new_empty(self.shape)
From fd6add575d87728dbf27f682495fbbbe46c4f5bb Mon Sep 17 00:00:00 2001
From: YuliangLiu0306 <72588413+YuliangLiu0306@users.noreply.github.com>
Date: Tue, 28 Mar 2023 10:40:07 +0800
Subject: [PATCH 13/26] [examples] polish AutoParallel readme (#3270)
---
examples/tutorial/auto_parallel/README.md | 1 +
examples/tutorial/auto_parallel/requirements.txt | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/examples/tutorial/auto_parallel/README.md b/examples/tutorial/auto_parallel/README.md
index bb014b9067b2..6a12e0dd5a48 100644
--- a/examples/tutorial/auto_parallel/README.md
+++ b/examples/tutorial/auto_parallel/README.md
@@ -45,6 +45,7 @@ colossalai run --nproc_per_node 4 auto_parallel_with_resnet.py
You should expect to the log like this. This log shows the edge cost on the computation graph as well as the sharding strategy for an operation. For example, `layer1_0_conv1 S01R = S01R X RR` means that the first dimension (batch) of the input and output is sharded while the weight is not sharded (S means sharded, R means replicated), simply equivalent to data parallel training.

+**Note: This experimental feature has been tested on torch 1.12.1 and transformer 4.22.2. If you are using other versions, you may need to modify the code to make it work.**
### Auto-Checkpoint Tutorial
diff --git a/examples/tutorial/auto_parallel/requirements.txt b/examples/tutorial/auto_parallel/requirements.txt
index ce89e7c80070..cc61362ba6f9 100644
--- a/examples/tutorial/auto_parallel/requirements.txt
+++ b/examples/tutorial/auto_parallel/requirements.txt
@@ -1,7 +1,7 @@
-torch
+torch==1.12.1
colossalai
titans
pulp
datasets
matplotlib
-transformers
+transformers==4.22.1
From b0ce5a10326912961f0bc07cbbd250bab7b9c399 Mon Sep 17 00:00:00 2001
From: Fazzie-Maqianli <55798671+Fazziekey@users.noreply.github.com>
Date: Tue, 28 Mar 2023 20:25:36 +0800
Subject: [PATCH 14/26] [Coati] first commit (#3283)
---
applications/Chat/.gitignore | 146 +++++++++
applications/Chat/LICENSE | 202 +++++++++++++
applications/Chat/README.md | 269 +++++++++++++++++
applications/Chat/assets/data-collect.png | Bin 0 -> 410749 bytes
applications/Chat/assets/logo_coati.png | Bin 0 -> 655366 bytes
applications/Chat/assets/stage-3.jpeg | Bin 0 -> 378888 bytes
applications/Chat/benchmarks/README.md | 94 ++++++
.../Chat/benchmarks/benchmark_gpt_dummy.py | 184 ++++++++++++
.../Chat/benchmarks/benchmark_gpt_dummy.sh | 45 +++
.../benchmarks/benchmark_opt_lora_dummy.py | 179 +++++++++++
applications/Chat/coati/__init__.py | 0
applications/Chat/coati/dataset/__init__.py | 9 +
.../Chat/coati/dataset/prompt_dataset.py | 46 +++
.../Chat/coati/dataset/reward_dataset.py | 112 +++++++
.../Chat/coati/dataset/sft_dataset.py | 169 +++++++++++
applications/Chat/coati/dataset/utils.py | 22 ++
.../Chat/coati/experience_maker/__init__.py | 4 +
.../Chat/coati/experience_maker/base.py | 77 +++++
.../Chat/coati/experience_maker/naive.py | 35 +++
applications/Chat/coati/models/__init__.py | 4 +
.../Chat/coati/models/base/__init__.py | 6 +
applications/Chat/coati/models/base/actor.py | 65 ++++
applications/Chat/coati/models/base/critic.py | 54 ++++
applications/Chat/coati/models/base/lm.py | 30 ++
.../Chat/coati/models/base/reward_model.py | 41 +++
.../Chat/coati/models/bloom/__init__.py | 6 +
.../Chat/coati/models/bloom/bloom_actor.py | 35 +++
.../Chat/coati/models/bloom/bloom_critic.py | 38 +++
.../Chat/coati/models/bloom/bloom_lm.py | 35 +++
.../Chat/coati/models/bloom/bloom_rm.py | 37 +++
.../Chat/coati/models/deberta/__init__.py | 4 +
.../coati/models/deberta/deberta_critic.py | 36 +++
.../Chat/coati/models/deberta/deberta_rm.py | 37 +++
applications/Chat/coati/models/generation.py | 146 +++++++++
.../Chat/coati/models/generation_utils.py | 92 ++++++
.../Chat/coati/models/gpt/__init__.py | 6 +
.../Chat/coati/models/gpt/gpt_actor.py | 35 +++
.../Chat/coati/models/gpt/gpt_critic.py | 37 +++
applications/Chat/coati/models/gpt/gpt_lm.py | 35 +++
applications/Chat/coati/models/gpt/gpt_rm.py | 39 +++
.../Chat/coati/models/llama/__init__.py | 6 +
.../Chat/coati/models/llama/llama_actor.py | 38 +++
.../Chat/coati/models/llama/llama_critic.py | 42 +++
.../Chat/coati/models/llama/llama_lm.py | 40 +++
.../Chat/coati/models/llama/llama_rm.py | 40 +++
applications/Chat/coati/models/lora.py | 129 ++++++++
applications/Chat/coati/models/loss.py | 117 ++++++++
.../Chat/coati/models/opt/__init__.py | 6 +
.../Chat/coati/models/opt/opt_actor.py | 35 +++
.../Chat/coati/models/opt/opt_critic.py | 38 +++
applications/Chat/coati/models/opt/opt_lm.py | 35 +++
applications/Chat/coati/models/opt/opt_rm.py | 38 +++
applications/Chat/coati/models/utils.py | 92 ++++++
.../Chat/coati/replay_buffer/__init__.py | 4 +
applications/Chat/coati/replay_buffer/base.py | 43 +++
.../Chat/coati/replay_buffer/naive.py | 57 ++++
.../Chat/coati/replay_buffer/utils.py | 73 +++++
applications/Chat/coati/trainer/__init__.py | 6 +
applications/Chat/coati/trainer/base.py | 168 +++++++++++
.../Chat/coati/trainer/callbacks/__init__.py | 5 +
.../Chat/coati/trainer/callbacks/base.py | 39 +++
.../callbacks/performance_evaluator.py | 133 ++++++++
.../trainer/callbacks/save_checkpoint.py | 75 +++++
applications/Chat/coati/trainer/ppo.py | 135 +++++++++
applications/Chat/coati/trainer/rm.py | 135 +++++++++
applications/Chat/coati/trainer/sft.py | 158 ++++++++++
.../Chat/coati/trainer/strategies/__init__.py | 6 +
.../Chat/coati/trainer/strategies/base.py | 136 +++++++++
.../coati/trainer/strategies/colossalai.py | 213 +++++++++++++
.../Chat/coati/trainer/strategies/ddp.py | 93 ++++++
.../Chat/coati/trainer/strategies/naive.py | 55 ++++
.../Chat/coati/trainer/strategies/sampler.py | 32 ++
applications/Chat/coati/trainer/utils.py | 5 +
applications/Chat/coati/utils/__init__.py | 3 +
.../Chat/coati/utils/tokenizer_utils.py | 78 +++++
applications/Chat/examples/README.md | 141 +++++++++
applications/Chat/examples/inference.py | 59 ++++
applications/Chat/examples/requirements.txt | 2 +
applications/Chat/examples/test_ci.sh | 97 ++++++
applications/Chat/examples/train_dummy.py | 148 +++++++++
applications/Chat/examples/train_dummy.sh | 18 ++
applications/Chat/examples/train_prompts.py | 199 ++++++++++++
applications/Chat/examples/train_prompts.sh | 18 ++
.../Chat/examples/train_reward_model.py | 160 ++++++++++
applications/Chat/examples/train_rm.sh | 8 +
applications/Chat/examples/train_sft.py | 184 ++++++++++++
applications/Chat/examples/train_sft.sh | 12 +
applications/Chat/inference/README.md | 111 +++++++
applications/Chat/inference/benchmark.py | 132 ++++++++
.../Chat/inference/llama_gptq/__init__.py | 5 +
.../Chat/inference/llama_gptq/loader.py | 41 +++
.../Chat/inference/llama_gptq/model_utils.py | 13 +
.../Chat/inference/llama_gptq/quant.py | 283 ++++++++++++++++++
applications/Chat/inference/locustfile.py | 27 ++
applications/Chat/inference/requirements.txt | 10 +
applications/Chat/inference/server.py | 165 ++++++++++
.../Chat/inference/tests/test_chat_prompt.py | 56 ++++
applications/Chat/inference/utils.py | 179 +++++++++++
applications/Chat/pytest.ini | 6 +
applications/Chat/requirements-test.txt | 1 +
applications/Chat/requirements.txt | 13 +
applications/Chat/setup.py | 41 +++
applications/Chat/tests/__init__.py | 0
applications/Chat/tests/test_checkpoint.py | 98 ++++++
applications/Chat/tests/test_data.py | 122 ++++++++
applications/Chat/version.txt | 1 +
106 files changed, 7069 insertions(+)
create mode 100644 applications/Chat/.gitignore
create mode 100644 applications/Chat/LICENSE
create mode 100644 applications/Chat/README.md
create mode 100644 applications/Chat/assets/data-collect.png
create mode 100644 applications/Chat/assets/logo_coati.png
create mode 100644 applications/Chat/assets/stage-3.jpeg
create mode 100644 applications/Chat/benchmarks/README.md
create mode 100644 applications/Chat/benchmarks/benchmark_gpt_dummy.py
create mode 100755 applications/Chat/benchmarks/benchmark_gpt_dummy.sh
create mode 100644 applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
create mode 100644 applications/Chat/coati/__init__.py
create mode 100644 applications/Chat/coati/dataset/__init__.py
create mode 100644 applications/Chat/coati/dataset/prompt_dataset.py
create mode 100644 applications/Chat/coati/dataset/reward_dataset.py
create mode 100644 applications/Chat/coati/dataset/sft_dataset.py
create mode 100644 applications/Chat/coati/dataset/utils.py
create mode 100644 applications/Chat/coati/experience_maker/__init__.py
create mode 100644 applications/Chat/coati/experience_maker/base.py
create mode 100644 applications/Chat/coati/experience_maker/naive.py
create mode 100644 applications/Chat/coati/models/__init__.py
create mode 100644 applications/Chat/coati/models/base/__init__.py
create mode 100644 applications/Chat/coati/models/base/actor.py
create mode 100644 applications/Chat/coati/models/base/critic.py
create mode 100644 applications/Chat/coati/models/base/lm.py
create mode 100644 applications/Chat/coati/models/base/reward_model.py
create mode 100644 applications/Chat/coati/models/bloom/__init__.py
create mode 100644 applications/Chat/coati/models/bloom/bloom_actor.py
create mode 100644 applications/Chat/coati/models/bloom/bloom_critic.py
create mode 100644 applications/Chat/coati/models/bloom/bloom_lm.py
create mode 100644 applications/Chat/coati/models/bloom/bloom_rm.py
create mode 100644 applications/Chat/coati/models/deberta/__init__.py
create mode 100644 applications/Chat/coati/models/deberta/deberta_critic.py
create mode 100644 applications/Chat/coati/models/deberta/deberta_rm.py
create mode 100644 applications/Chat/coati/models/generation.py
create mode 100644 applications/Chat/coati/models/generation_utils.py
create mode 100644 applications/Chat/coati/models/gpt/__init__.py
create mode 100644 applications/Chat/coati/models/gpt/gpt_actor.py
create mode 100644 applications/Chat/coati/models/gpt/gpt_critic.py
create mode 100644 applications/Chat/coati/models/gpt/gpt_lm.py
create mode 100644 applications/Chat/coati/models/gpt/gpt_rm.py
create mode 100644 applications/Chat/coati/models/llama/__init__.py
create mode 100644 applications/Chat/coati/models/llama/llama_actor.py
create mode 100644 applications/Chat/coati/models/llama/llama_critic.py
create mode 100644 applications/Chat/coati/models/llama/llama_lm.py
create mode 100644 applications/Chat/coati/models/llama/llama_rm.py
create mode 100644 applications/Chat/coati/models/lora.py
create mode 100644 applications/Chat/coati/models/loss.py
create mode 100644 applications/Chat/coati/models/opt/__init__.py
create mode 100644 applications/Chat/coati/models/opt/opt_actor.py
create mode 100644 applications/Chat/coati/models/opt/opt_critic.py
create mode 100644 applications/Chat/coati/models/opt/opt_lm.py
create mode 100644 applications/Chat/coati/models/opt/opt_rm.py
create mode 100644 applications/Chat/coati/models/utils.py
create mode 100644 applications/Chat/coati/replay_buffer/__init__.py
create mode 100644 applications/Chat/coati/replay_buffer/base.py
create mode 100644 applications/Chat/coati/replay_buffer/naive.py
create mode 100644 applications/Chat/coati/replay_buffer/utils.py
create mode 100644 applications/Chat/coati/trainer/__init__.py
create mode 100644 applications/Chat/coati/trainer/base.py
create mode 100644 applications/Chat/coati/trainer/callbacks/__init__.py
create mode 100644 applications/Chat/coati/trainer/callbacks/base.py
create mode 100644 applications/Chat/coati/trainer/callbacks/performance_evaluator.py
create mode 100644 applications/Chat/coati/trainer/callbacks/save_checkpoint.py
create mode 100644 applications/Chat/coati/trainer/ppo.py
create mode 100644 applications/Chat/coati/trainer/rm.py
create mode 100644 applications/Chat/coati/trainer/sft.py
create mode 100644 applications/Chat/coati/trainer/strategies/__init__.py
create mode 100644 applications/Chat/coati/trainer/strategies/base.py
create mode 100644 applications/Chat/coati/trainer/strategies/colossalai.py
create mode 100644 applications/Chat/coati/trainer/strategies/ddp.py
create mode 100644 applications/Chat/coati/trainer/strategies/naive.py
create mode 100644 applications/Chat/coati/trainer/strategies/sampler.py
create mode 100644 applications/Chat/coati/trainer/utils.py
create mode 100644 applications/Chat/coati/utils/__init__.py
create mode 100644 applications/Chat/coati/utils/tokenizer_utils.py
create mode 100644 applications/Chat/examples/README.md
create mode 100644 applications/Chat/examples/inference.py
create mode 100644 applications/Chat/examples/requirements.txt
create mode 100755 applications/Chat/examples/test_ci.sh
create mode 100644 applications/Chat/examples/train_dummy.py
create mode 100755 applications/Chat/examples/train_dummy.sh
create mode 100644 applications/Chat/examples/train_prompts.py
create mode 100755 applications/Chat/examples/train_prompts.sh
create mode 100644 applications/Chat/examples/train_reward_model.py
create mode 100755 applications/Chat/examples/train_rm.sh
create mode 100644 applications/Chat/examples/train_sft.py
create mode 100755 applications/Chat/examples/train_sft.sh
create mode 100644 applications/Chat/inference/README.md
create mode 100644 applications/Chat/inference/benchmark.py
create mode 100644 applications/Chat/inference/llama_gptq/__init__.py
create mode 100644 applications/Chat/inference/llama_gptq/loader.py
create mode 100644 applications/Chat/inference/llama_gptq/model_utils.py
create mode 100644 applications/Chat/inference/llama_gptq/quant.py
create mode 100644 applications/Chat/inference/locustfile.py
create mode 100644 applications/Chat/inference/requirements.txt
create mode 100644 applications/Chat/inference/server.py
create mode 100644 applications/Chat/inference/tests/test_chat_prompt.py
create mode 100644 applications/Chat/inference/utils.py
create mode 100644 applications/Chat/pytest.ini
create mode 100644 applications/Chat/requirements-test.txt
create mode 100644 applications/Chat/requirements.txt
create mode 100644 applications/Chat/setup.py
create mode 100644 applications/Chat/tests/__init__.py
create mode 100644 applications/Chat/tests/test_checkpoint.py
create mode 100644 applications/Chat/tests/test_data.py
create mode 100644 applications/Chat/version.txt
diff --git a/applications/Chat/.gitignore b/applications/Chat/.gitignore
new file mode 100644
index 000000000000..1ec5f53a8b8d
--- /dev/null
+++ b/applications/Chat/.gitignore
@@ -0,0 +1,146 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+docs/.build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# IDE
+.idea/
+.vscode/
+
+# macos
+*.DS_Store
+#data/
+
+docs/.build
+
+# pytorch checkpoint
+*.pt
+
+# wandb log
+example/wandb/
diff --git a/applications/Chat/LICENSE b/applications/Chat/LICENSE
new file mode 100644
index 000000000000..0528c89ea9ec
--- /dev/null
+++ b/applications/Chat/LICENSE
@@ -0,0 +1,202 @@
+Copyright 2021- HPC-AI Technology Inc. All rights reserved.
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2021- HPC-AI Technology Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/applications/Chat/README.md b/applications/Chat/README.md
new file mode 100644
index 000000000000..731005ab25c3
--- /dev/null
+++ b/applications/Chat/README.md
@@ -0,0 +1,269 @@
+
+ Coati - ColossalAI Talking Intelligence
+
+
+
+
+## Table of Contents
+
+- [Table of Contents](#table-of-contents)
+- [What is Coati ?](#what-is-coati-)
+- [Online demo](#online-demo)
+- [Install](#install)
+ - [Install the environment](#install-the-environment)
+ - [Install the Transformers](#install-the-transformers)
+- [How to use?](#how-to-use)
+ - [Supervised datasets collection](#supervised-datasets-collection)
+ - [Stage1 - Supervised instructs tuning](#stage1---supervised-instructs-tuning)
+ - [Stage2 - Training reward model](#stage2---training-reward-model)
+ - [Stage3 - Training model with reinforcement learning by human feedback](#stage3---training-model-with-reinforcement-learning-by-human-feedback)
+- [Coati7B examples](#coati7b-examples)
+- [FAQ](#faq)
+ - [How to save/load checkpoint](#how-to-saveload-checkpoint)
+- [The Plan](#the-plan)
+ - [Real-time progress](#real-time-progress)
+- [Invitation to open-source contribution](#invitation-to-open-source-contribution)
+- [Quick Preview](#quick-preview)
+- [Authors](#authors)
+- [Citations](#citations)
+- [Licenses](#licenses)
+---
+## What is Coati ?
+
+Coati is a large language model developed by Colossal-AI, which is also a unified large language model framework that has implemented the following functions
+- Supports comprehensive large-model training acceleration capabilities for ColossalAI, without requiring knowledge of complex distributed training algorithms
+- Supervised datasets collection
+- Supervised insturcts fine-tuning
+- Training reward model
+- Reinforcement learning with human feedback
+- Quantization inference
+- Fast model deploying
+- Perfectly integration with the Hugging Face ecosystem, high degree of model customization
+
+
+More details can be found in the [blog](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt).
+
+
+
+
+
+## Online demo
+You can experience the performance of Coati7B on this page.
+
+[chat.colossalai.org](https://chat.colossalai.org/)
+
+> Warning: Due to model and dataset size limitations, Coati is just a baby model, Coati7B may output incorrect information and lack the ability for multi-turn dialogue. There is still significant room for improvement.
+## Install
+
+### Install the environment
+
+```shell
+conda creat -n coati
+conda activate coati
+pip install .
+```
+
+### Install the Transformers
+Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code
+
+```shell
+git clone https://github.com/hpcaitech/transformers
+cd transformers
+pip install .
+```
+
+## How to use?
+
+### Supervised datasets collection
+
+we colllected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo
+
+Here is how we collected the data
+
+
+
+
+### Stage1 - Supervised instructs tuning
+
+Stage1 is supervised instructs fine-tuning, which uses the datasets mentioned earlier to fine-tune the model
+
+you can run the `examples/train_sft.sh` to start a supervised instructs fine-tuning
+
+```
+torchrun --standalone --nproc_per_node=4 train_sft.py \
+ --pretrain "/path/to/LLaMa-7B/" \
+ --model 'llama' \
+ --strategy colossalai_zero2 \
+ --log_interval 10 \
+ --save_path /path/to/Coati-7B \
+ --dataset /path/to/data.json \
+ --batch_size 4 \
+ --accimulation_steps 8 \
+ --lr 2e-5 \
+ --max_datasets_size 512 \
+ --max_epochs 1 \
+```
+
+### Stage2 - Training reward model
+
+Stage2 trains a reward model, which obtains corresponding scores by manually ranking different outputs for the same prompt and supervises the training of the reward model
+
+you can run the `examples/train_rm.sh` to start a reward model training
+
+```
+torchrun --standalone --nproc_per_node=4 train_reward_model.py
+ --pretrain "/path/to/LLaMa-7B/" \
+ --model 'llama' \
+ --strategy colossalai_zero2 \
+ --loss_fn 'log_exp'\
+ --save_path 'rmstatic.pt' \
+```
+
+### Stage3 - Training model with reinforcement learning by human feedback
+
+Stage3 uses reinforcement learning algorithm, which is the most complex part of the training process:
+
+
+
+
+
+you can run the `examples/train_prompts.sh` to start training PPO with human feedback
+
+```
+torchrun --standalone --nproc_per_node=4 train_prompts.py prompts.csv \
+ --pretrain "/path/to/LLaMa-7B/" \
+ --model 'llama' \
+ --strategy colossalai_zero2
+```
+
+
+For more details, see `examples/`.
+
+We also support training reward model with true-world data. See `examples/train_reward_model.py`.
+
+## Coati7B examples
+
+
+## FAQ
+
+### How to save/load checkpoint
+
+We have integrated the Transformers save and load pipeline, allowing users to freely call Hugging Face's language models and save them in the HF format.
+
+```
+from coati.models.llama import LlamaLM
+from coati.trainer import SFTTrainer
+
+model = LlamaLM(pretrained=args.pretrain)
+tokenizer = AutoTokenizer.from_pretrained(args.pretrain)
+
+trainer = SFTTrainer(model=model,
+ strategy=strategy,
+ optim=optim,
+ train_dataloader=train_dataloader,
+ eval_dataloader=eval_dataloader,
+ batch_size=args.batch_size,
+ max_epochs=args.max_epochs,
+ accimulation_steps = args.accimulation_steps
+)
+
+trainer.fit()
+trainer.save_model(path=args.save_path, only_rank0=True, tokenizer=tokenizer)
+```
+
+## The Plan
+
+- [x] implement PPO fine-tuning
+- [x] implement training reward model
+- [x] support LoRA
+- [x] support inference
+- [x] open source the reward model weight
+- [x] support llama from [facebook](https://github.com/facebookresearch/llama)
+- [x] implement PPO-ptx fine-tuning
+- [ ] integrate with Ray
+- [ ] support more RL paradigms, like Implicit Language Q-Learning (ILQL),
+- [ ] support chain of throught by [langchain](https://github.com/hwchase17/langchain)
+
+### Real-time progress
+You will find our progress in github project broad
+
+[Coati](https://github.com/orgs/hpcaitech/projects/17/views/1)
+
+## Invitation to open-source contribution
+Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT!
+
+You may contact us or participate in the following ways:
+1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks!
+2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md).
+3. Join the Colossal-AI community on
+[Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w),
+and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
+4. Send your official proposal to email contact@hpcaitech.com
+
+Thanks so much to all of our amazing contributors!
+
+## Quick Preview
+
+
+
+
+- Up to 7.73 times faster for single server training and 1.42 times faster for single-GPU inference
+
+
+
+
+
+- Up to 10.3x growth in model capacity on one GPU
+- A mini demo training process requires only 1.62GB of GPU memory (any consumer-grade GPU)
+
+
+
+
+
+- Increase the capacity of the fine-tuning model by up to 3.7 times on a single GPU
+- Keep in a sufficiently high running speed
+
+## Authors
+
+Coati is developed by ColossalAI Team: [Fazzie](https://fazzie-key.cool/about/index.html), [FrankLeeeee](https://github.com/FrankLeeeee), [BlueRum](https://github.com/ht-zhou), [ver217](https://github.com/ver217)
+
+The Phd student [Zangwei Zheng](https://github.com/zhengzangw) and [Xue Fuzhao](https://github.com/XueFuzhao) also contributed a lot to this project.
+
+## Citations
+
+```bibtex
+@article{Hu2021LoRALA,
+ title = {LoRA: Low-Rank Adaptation of Large Language Models},
+ author = {Edward J. Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Weizhu Chen},
+ journal = {ArXiv},
+ year = {2021},
+ volume = {abs/2106.09685}
+}
+
+@article{ouyang2022training,
+ title={Training language models to follow instructions with human feedback},
+ author={Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
+ journal={arXiv preprint arXiv:2203.02155},
+ year={2022}
+}
+
+@article{touvron2023llama,
+ title={LLaMA: Open and Efficient Foundation Language Models},
+ author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
+ journal={arXiv preprint arXiv:2302.13971},
+ year={2023}
+}
+
+@misc{alpaca,
+ author = {Rohan Taori and Ishaan Gulrajani and Tianyi Zhang and Yann Dubois and Xuechen Li and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
+ title = {Stanford Alpaca: An Instruction-following LLaMA model},
+ year = {2023},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ howpublished = {\url{https://github.com/tatsu-lab/stanford_alpaca}},
+}
+```
+
+## Licenses
+
+Coati is licensed under the [Apache 2.0 License](LICENSE).
diff --git a/applications/Chat/assets/data-collect.png b/applications/Chat/assets/data-collect.png
new file mode 100644
index 0000000000000000000000000000000000000000..15eb662201b987dfa1f2de8872271bcc964b10e0
GIT binary patch
literal 410749
zcmX6^by$<{_ufWI3QQ!WR7OZkii}37(cRtMBBN!5bR*qe5=yC~JETD;qeI$}zkR;H
zvFjSw_5Sm`&vVXw?sK2}iGispk`mDo0RRBfH%fAv0018LUl0c&2>at12ATu_;D9%B
z(%SyH`&|KvxdT4z{{*V9T!kh@l~#VpC*rC4Qx-~hT8
zyPnqy)KE;J-YX^iIQil)&GRCp?iBn@nGiJsXGWy=kfnJEExa(Ncx@1$FfkSd?)BCD
z+G5R86?l&0n-UD9d9|Qjdummxe0jFnWG~nB6
zECQ>NLgo)ig)nrJUjSJHA3)YIaMfac)66UiI$CxBwUS5Gm{1N%s8?Q=zeFR~h*}fE
zqe9&<)|9B?v0%GfE;?*`qOqcL7s$Z_!z{8UN3IMrsUK5ny|0>mQFfDwpBai$(|OOW
zi!lfto*>eF@2rh6lJ_8hNP(+vG8L(pF5ROonGS0;-_wE^^;yX3dB411xLbIj`N6a>
z;fCrDV6{K_-j?G@?6s@O*G+(J)sZETI}!COr?7e|(+$kVAw1)Vu;5r!SsB1vj?+}O
zr5$1?eo@-aMf?Un)C7}4nxKzpddD^1*Yja^mrF(Q_zwSw1GFHAocGCqea?_@(gh5h
z5&g4O8q;wT?;uwoQ_u~ZBT%s_%uDbka_NaC`h&9+8IpR>FQ){sc9rD|w*{yk3o-u|
zJR0F<9sySP%GnW|?e3}M{Qwo&d6?ov0aIu>7#e^GHb4r8El^?7>Z!FLo+b%~W86-#
zgXY1qiWWzdr~*+Nt-?(ALt>ib$eOqoGip%WMp~q;*2-IhLB-=2#W()d5GmoR8x{re
zH<-feoAXyI%au1B5LazXqgNV}gfiwgwdNg-LVEj&$ep`e_Q`hzFSZ$wHJUR)kPf{0peH9%2qU
z1F}dk&&b;rUhV3Brj9J@)_{-dO3@`^yeLg^!WJJJJv2{D>P=N@1V;nCU<_7*FN!nq
zIMi@QN0cUQ#bZ&{y2BMO3mNP
zL9dR=xlVn>pPnP!{E~IMFegI#aR<)H73RR$VHmC!nLG68LB(?d2IyZPhan@sj%HEEx*+S6Ape{
zegmsGnE`k>a7uxzZyG*4IV9zj!e%yonp7G}cNoOOh_;L8lv)x04;n4dyy+-v&Ja@s
zvCjKZ2=OTO(yCzGcq+k+QFN00eto)8Qt>565cBUaDqZ(WPgm$H*P1+_O4t+Nf+KLb
zCv%Vpu&3jD#7Fi*3L8je|3wZIk*AzvE?8Sea{~CBZaGLwh}2k-Ga%%bS^dfZ-okTc
zLDr^6Qfp#iHSnLG?`su*eAOy-`!!~`mqPy0E=!%z+MndI2C
zEasZe9P=hb=Gh9GAijXNKv)p4?X5d07dF?UrY`69<}f`0SKG`hq=rnvunYQo-~$v^
zqz)Tkme)1Ha40pF8L{NF5&qKZWSB^nC}
zH~ZTZmV$Y04fMZ)c>@i$v4~UNj!otN-tc3uzBqUt)if!C_)aC11Q&RaPU5L}ngg6n
z+M&!WjI|;Tr7%k){9plHdRGf9o}>fUm$*d&Rouivzo^C?BO$5U$vE
zOJKKJivhUs8sUXg24nb~b;Xg;kOkuS9O+Dj6tgLe1h*McQ$k!-s6k-ARBrDyDK>LG
zOd%oS%W#iUs#G^HN7@4$>|C0dMty@`g|r?Ya5>O7Eo6vGYJ{ZX5|H!A(h+p_eL*%P
z=yKbP%_K009hd#FCD14C^VXgS!KC7}%Z4#(QTlZ+)Z<&EaO3vR^zY{}1QECjtbm-i
z9;Ut2nhYQk&HaybCrcz7rJ^_Iks7(6Of`P6y`cDLB^NOU;;MwLEUvBC{f{cGyP4Gt
zu*R-hmYpMnio;bOg9GM$hUQJQG^5UiuF0h48-FPK?{H`kIn_7g<66;CbtfYEDCgmc
zh|yR^Fwuue-z(P++b7+xn>5dX_%4LZfnv+1gp0uNcv{l!Y$?14U|jofGD853QS~pK
z9dNV@1e`tj3ULZt)5N~HFx0~eP5~xpix9#kG(iXonbTlnn5~XS8zTsN7YpgBbkM_O
zaAIrA1nxqqXb{dfP2Kks?=$Mgp1w41PV1=z`+YbO6!Gy{ArAj2c^<7bX`bB-7J|?v
zc8FApOy-O8J@yeYe=ek4DX53DCVboph_x!A6>g4zB<%dtfMfGy$ySh-?qPb#Sj7Jh
zEzUa`duTn?H`v91d^x($Q`bw*xjtBKgL59d#KvuBQ(|TJm~Dg>ZVSfZzV#D-O5^mn
z2n4XHP@0o@Y!BrnGy=-x!6`IWPuz$H&&W3wnyS~Qs%>(BU9u&iciw>Tphx>}CQsb#
zbajB67IN?5hqp)#l7q5#FX!h7)YIOt;POS4La?j{Se7x4o5Db;ShSw|situ>H(2So
zw(3T|;lxT0BN2~^Vmibk2^M7*I1zt-&Lzmf7IH-3?fR|7un)CLbc@|A>Z2a_tDx4_q8mMx%Bp_|TZ%w8Gm
zY=B^eOhGSc_%Z4B^Q@1>nE?v39bnn_3gZ2?(-1PyW~u0yt*2(Lqae@#9?0?8O<`Gu
zh%MHD+EXJ}n)(EmD^DHvAIh~=oivA~F?9~FJAm6vFf4=%Drj<;am~q=Wl4&4C<~TK
zQuDo`c7MPn1+KXPv=yARAuQ=Z2eqtwQY#`j)qB1cMp0VPj|&eZH2hjg_yh{1$R4_|
zQR8o$Qi;-X(Kf`ydA~NFfw)GE&8m8EnmgirL$zk*#wi86<#4yep`JY>HbIL#=GH;`
z(bawDFr96hRKsSY8;`vJvLV&l;usRrko09ZWCS8bV926iLUB$|Wi~XIE?ufJMpWog
zI>sv$YyQex=vas{vdNABFo!n<{{#dr8hr2Q~SM3+028`0xucCudjy_$eNKhnH#0
zlEhcmS5^{l8`GOV1@UE3Y~r%Z;aI-z#YGgml!Zesfb>P8-)t#4$Urhra^94RB5ggj
za-9XwwTv^k{lOxZg0UY^EC0XF{1}u%niMRG|f8WG>Bb4WAqdUbjVokj+pYa^UR$exus)CMoEvi{WM@ZM_5
zxx)}|TiBesmj`*gj=qs1ZAkmCIy^(Fe$L7Tv*YlvV99jYn_9|`Khq*Jk9IY4f$l@F
zl{Zb3L*$p^ns@Tb9?eXLlWCEny=tY{1suaw3p4k~6FV;XZh7n#vk(QZEoa*ZW9np%
zoZdnI0t|?lh43p>SOp8bTqRiXfvv#~Vtr40p*UfyUUhyNM&YD5mWfpl3)+UBbkQbz
z_gW(gTfriP`cg;=cqqOKPDJ9^MiF+K-XR>Qd78%>UI|n6DK(8j0Sze-HGFJU+i7|z
z?ICCr`jQoU+VvSIUCt2M|4NwraDEDwP(gmkEK^TcQ_X2o_cFboy#Q0+~;27~rp7}FtldQ#nq
zT{#V12{zOYV0R2)SyKd-nV)K4VWce^B@B_01p_CRj$6j+cVAPtrjir}h1Ug=FrU6g
z7)+4i^mB(xz?aY;fpXMFUsvD`=cJ&(xjKT$+X@U^qn;X;OZH!Zv=N+M@LCvUEpBIl
z=A$faSRzrr!!iVd)1~dQsp~G4WaaJQsq1a^Waa)zBQmqqT*S%R!oT4>D(eB7msx+O0stLJ9(8qE;~9$*n};sA(iC^wYBTn)hzlVp!5Gyu!yaP&V_Fs#3UiI``f
zG}oQX7v9tW(9rPPU3gtp!P8UzO7G
zcmAZkN|`Jt*>f2^iKW4;Q@r-ye&N0Rj1rb9Q;MVT?~dY`H69=vAr8OOJ-mV29y2~t
z#?nHPZKR$Mz7n1g{LBcuy1@@|C!C*9klxa_ja3F@2o^$pW+cVY%gV=ouaKj8tS^4r
zCUYbJBS^4K;(v)mRqzhw*K)F6g#|(M(SLN+NEaiCkQle6OT}}a!mhd?vk5v=d!kw%
zfO1*EY8u02{!TreFOn}fzYzi%q+@q~aubQr!QO9kSQD93BCC#U4!mgrg{occQ~+5K
z#jYe$zJCv$N%$TkPXvGhk}PA{mfNWPPR@EQ%_CzP?mNnY_GW7qp{!X21ASxgV`BeU
z$UUoNNim0R&qlH6>_i^d@{N0Rsg+=14n4>jONyW#dejKesFKGR(6rDpgScoi@PLKnd0Iw$3c{?CA2XR)Cd%-qUe)Af=B{)>JYP5OOA?xQ
z4nqhWp#!K(?;vY3pmIhY7$y}dF9|_SZO^ySc8TI{3o~O}cnUDD=wQLYlx)^(s^y|Y
zd@-m%=jZ8`cIf9%m`|L$P$>8)9R1nuMc9)@|Ba-0PO1tztDa&uuQOcrt3&0er^K;rJ{B+v>PijcFa~h3PsHkV(_xyh-P_$_7;{CW$@Q(
z@{jnDV1F2r7zg3_s7hz2Un5|!tEo9Mu>Pf0#fs*h;M)ztsI2Fno
z*RgDRG0pL99~>9#KFjK%n|Ob|6OQ_PWBa}RN()v!dn3}J;tZVQcc^t=8|vv~WVwpCS12g8JkGtU8_8q&x}gB0A7R%2U3e8;njrw5*`rk#z$
zY^ANfSH^1zftbf>Gh-2F9%9+nB(?(cojY-J_*-!+wq*=^*{bf(t2XTkot&-hcU>=o
z1x-cb84sVB*SEc(#`jh+^pkx{kUy>H320KuBkOggdi-;bM{6`4{^1O3eWd^7#bjPd%1kcOXg&tv
z>qFfA-H}Z;-`PeK6%-WoYVuG9lr`PB8zQkmRZiO}tH7llW5~4*0mQ`-S
zii;#0<=Ez-rTDR5CpY-oUA=7K%@mU=ma7uK0nZoTC`bRtj3QA+#+YTrMuG(zWM$K&
zFZ!cxN%f73=3PIBYxD_QHX#*BnhCFIG}oi4sc-9XYkHx0U;Qb_8EbjC3#bALx}Vcc
z5(rZTY(Al~`}5vF0qQyl*PRh7GQ5`W(GdWOtF?DT??sy9y3v{@@TN?Ddp7&R{0|ue
zG?feG0avL`f3K8OPbzLFP`eN&mKFE*!=@B~JDg}t!wm=Y;$%9zffbx2=q?kUdC;bU
z*DAQ0<`7%tu^&lh+skM)vfP#?T`&6O+>_ZmXrn>mA_72I{!@$b@t14obuZg{_y
zgvs;XvUAk)k+6DGQZ3_}u@cz+P55(Bkl0p#HJGqjaHd(XSxaz6k0(A)>VAm*F;A|9
z-`%%J;tZbnc=b4S^)ZzR6;*ZBv9a-q@v-qS>}ULiHubl$%sr&oQHoRFEjH0`Zo^yW
zVId*wO}~H7IqACT_jnw1`s~fuuQqu6xjLthijKm+BEbH=Ep3v1=qZ+6srbx_RFmQO
z9ahmh1uC3d+C!;NygM(3!+OX$c+HhLu%c;g!40-O^&FNV=7{<{ECaX9FTSERz#U%a
z{Ez8V@O`q35)Kq4EP0I-~${;Ye%
z{*{H$?_P>{lykKULt^HgOSQdIqSZJj-JhHbB-UZ7Vqw@PjGNv;c2C&@-Dk2JdWn
z5|@jBwh;M54slav!ZQ0flwOd
zX3XRBz)sF5(TqlKmp`X_R}+YyUBA`GvE-iQmx+t11>}!<_tF$s^n7e615(e$HUpMg
zIt_qY90^Z2BJVwsO%D+>&I+{k4?R(F9z=rFX}f9Fdi5q>e0NStQ`prRBm?*V9aEL7
z=XVAkEbZ^_kDi^4j$*>%i~98QL<{F^35_?wdM3{}5EQv`MG56_B%+kU
z-PCza_puT@|3-Rb-H9IqQdt3`#qrbeos))f#!2&_GPbc*z^Yn;p=u2k9n5?|8!Ovl
z^A}!1iye8eJ^fx{E7Tvhxk$VNza4YSR(%Mg63bTnC~P9@A=D1n-f*<~v0!C&D$esi
z!A_6liAWL`!ZgE*R}JZbkUiz0GVi*wrGc-)Yc}TZgqafg<<>*|06M-ehM$*mhXbSu
zcnWaz=}jfzwkq`av(>dr(ely`rKw^Pa&Qus`PeVefd-9L2l=*5)RME_wy@~T&%hnpO!rhPdmAxK#o3<+er_zto!Be`QYW3AN9r<)Qjz
zY>KoNgC#hE0T-;z^%6BC27uMqC5Bphk8WJz=q7;^_=Bv7CsSU{=ctN^hi{q+csxX9UW4=hs<=u0bO3b?VIj>o_=hf(r`fIU}{e|Ojrgwjj2bl6DgIpaQ
zJzZVjyYhTnwc-B906jSGJo)X+0Pw4RP4`iB<1U+x+}hduD2z+RyRxG-Rwm)R20`bTs!4
zo?ZB==~wK@;*+_Ie?P5vYitEb<9&nLgu)puYq;3Dd#6=1wokV3Rn8_ayr9sU5g0K#}=GjsT#V&`txTlH~^K@HNtEtc*`Jo3sp
zq)n_|?NJL;DY+dMI&ZWh*5m5cuuw4FB0NiNpb)sc?R8`6g``B`_O`!HCbY*J6gD3L
zY6W_PH~pGQe?wg$ib!#J&MD6QfUl~1O+=e>F7`1m!`1dA0l~VI|KIPjOu_!B*`1i2
zKg4N~Z%Zu?EdqlV-jG4rZ?oP&bEaVD+M8#w8zWaUzFJMbrA~R;Zn6!M3qy0`
z%B7z)1KjQH|CEt_Ib8I)@47tfJX$lkT=N^)9J{@}-K3vAI|!(P=7a52CC=;?Z=y;X-@%5<|e$x65AS%TTgPQ3YOuVqBOTkf56Z=
z_wj+}4k=gB4kF`QV1CXEBQVqK7?OuaH%ccYJH}d!cXO~j
zEb-Zs@JwK0JQ(3cQ&<-#_}~%W&Vr&h4Ii`I5d8P2JQAAqog(dmDN_bIh7ypBblktL
z_8l+|^a->a*m}dabE(_k&(n2#VN~C9clG-tIg@1I#_Z0FmX;Qo?!nvf_5$ncmK0MX
z;@{f_JTp{YR~@vbaYM%Yn~eoMVP&<@=u>_^t^v-*FCAUw;tUS72lh41~=C{uyW+9NjCAor0f>t
zI`~Ydiklh0R9a&U7NHgrD#9{HRrmneqLjxCcWU#_z4$gP
zLeUun4W|%Yk4Ky&{bL#<|?>eNH~W=kslO
zO_QC`*DpAT^PnDB4LSqGgN%V!UjLv1KBmIzP2}sQkIlWT`##;Kz+BkJUz7=Bhcke$
z{u=MRTMP{n)##szmAxB+kJkMzG62c9S_y$*w=CPvPf3$;KU7?T27tRWuHkTVy^&yO
zA$gvvOH$}6
z8x_ZE^Z*0`Fmm)0LtUOy$5`G_#;b2i?emxC7w6<>mzQ&Q^fWXySYGcff6(fQ-abJW
zV@WxFw9V#bXlP$wVQ6TmX9xF39?RSexIc7Y7Aq?&vvzi3Du))c
z7Gq+D{-cQP9=BKLIU=s6J-+Crv$M1P8!W5YU)f(7-H+4r?|)mGymA2LokSFP&1OMA
zE;-rT|4SIlISTr9SN{$@G7)^}wKwkuLu;g&Sc7>Tdy&tGYm5GWIOabD3E={UD?4HU
zMsJTT|Kl&qT{{NJGT%YXV
zz74M6aalog#!zB+9166Ar-(s;~
z|G8U$ROz_a_ff`BGGP>0zhd4|$GypzBHd}Pv)5ZG<>Ve(e!+VAu{t~>^8>ziniVc#
zYMyd!@i1`~{Rp$sTS8i2?n#dqPHhj+{y8v}IwPF;o_aE3;ix|6ph{}%U)ey?1nIWK
zHzy<~5#~9ih6&k`E8(h_mG<^l^rH{DXLI5#uBp3&orWB|-L2+;E4QcPmWi!G>y*K4
zBp}UoSp3nx!ziwj#Ap^~R6s^`=bVBNAkH|gz=G5HMI;&cM?5bLk67QBw`|`jCJyQg
zwW&}oS-_KHWK1T7W&M~vn{}%1zRPn9xwh-M!C<`dxw%KRNL!~svOQ)M2i>OJ$J^W6
zyK~jLH8hO0w2LhPGmJTHHC?x#m*>~IeGfXZPsqtge;Ti7R+bxcrUi}Iv=WUfKXb*2
zn=$qL-Nva-((l+>WO}l`iw%NS`gvYchA;lVQKchRb`!Z0K3B7?TzUiS{q<%&uYDa<
zE+-5z0hJ}U=5NOf%MU9NyME(~E^VuKgT){3WFc_A3a(<);pd|xRuWz`CXGJFK>>i2DpeH`TVGRkjt#YZ*Y1uxLIrT*?H+}!1t?oqc-QY&fx3x
z7e*q^`aQN|F1I+rXixPwJe%3yQ&>&Me_9ZhIlJe;vudyU9998OEL%63^|;b@Z&p^&
zBN;r#U;Qse7p)XO=7|UI-eA=p}D{gn2
zQX#vQRZn{Gm>4l?}*7*M8IF$D=oZy
zU_1NvfE%yy@^=(4bjRU~KGf}$zVx{;-1~Ks+DB(i2?C(Qix~pBG*a4^1z$NBVdXys
zT&u>NpLsVqPEM_c&A%I5dgiH0d%i0kW{Q+wicu<)v6=Kpa4aq+vcWH-hl#bQtoJ&<
z<*oZ@UNk-Y2DfnJs2#tBe!)2sT;?~7;45iX(4qJ-E)nE=2(c0R=AIwZ`27ROwNlIA!E^((7dfUCbef8O77n%8r^DRO_3=eL&g=@ZCs>17ap
z<{_J}4;Y|ZZ+eR@jfYFEd*h4}hbxU%r;L6zWq53Nz-W#bS+vB04vi1=7j_n
z#yHb9!a?1K^=fqlkBS%!qIPz6u-rFS+-LVk43%QS6Unow!f*m9(V&am^X;)bp;|YQ
z0cOjN6|c)xzu*I=kOQ+Dk$caPbcrypEu8__P&ntmBwKuBRk%V=q&5$S}!AilcAn2io=$OH<@{@4w
zyn1Mi;K5l0!0Bk4p}%EpmX=_ttNiN5Ab<+fxW*E6!XAv57f0L(SBZ`pxt=xssSgF@
zEx!?Iav)fBHxG78Rs5ql8Gu{hc{XNQyH@CeldxkZ-EO-`7m;!KR-MRxqJ7hh*s!uF
z?+e`5tIpM@%vN^WwS>wj(@K^aYEvUH-bpm~IsVt?KyqvsRC<0TuE2(?I?}QvFI=X5
zZ>Rlq{L`1ieGgTdm3_0&%;t)YWpIisx%s<+k09@hLoVDi)W4}LnGsOtQf#4pFDv~+
z1ing<{+o>*LAyKD{lET@EA{)!4lld04ry_#+||WuSM265HoU(*Tn)O|UksjK^~1LJ
zx7Y%iQLj(IN%La6C_(%;$<2r2hOyZZrMR&Oulr~d)K2BjjGcPExX13_BsK!FuV38O
z@BXkSOpIyP@xIK7l-d3o-0ByhBGKTyI4E_cc>nkE{n4fG(VFUm#|#F}wPW0@27@Xd
zdp0Z+-@DLFlcl0KzffiErNiq2y%0npA6kSxSq|K8+WQT1I4q+_Eil)%7&llkGuZ3O
z>!y2FvHHGN%Y|O?5F{SD3vjnkVR)^XX9m~|^y?ih%p5iFn1i<-KVqCKPI)axt8vZ=
zi2&$mKI?iOxADV*r~vN-*cxO9%&Tx{#oYsSRt^|5yDtsXiJSt{%HLoLvKIeVGd*ba
zs}_$Id_h6113mYtda1#CS3rex$k6LVmzW?AYF3Pr-zVa$3&l8)$}wJ4`$BQTDG$QV
z0!s|(>H_w4SY}cR&w`w6Ql!_)4sYrh(RqI93^GNvubd;E^S8g0S+)V`<~>TpnD9vy
z%v`UcZ}p)eAKz0J2!{d+*dG^i6U4yPxaGZh6A~j;NNGq3&jl%g`Vb^%`~ANds{5Iq
zYrpl|LqDt@AfAu{)_MG?GGyT*I(@>ejY5l^|*}v`jvQUVxOxf
zshpmMs-P5xHpWs}{@^IwBAu8RGwJli>}LtO^_(7Z@%G&O?h7xU$j!>l)%N(+q%~8*
zFoTeoT)#h?$-n(BbyZycDNvKJPq|dBxV;kY{ns-Iyw#9}i`Ywzd_{g~6oK0RPgScw
z`2rR(!5Y&ROgpz-`st}3t9o#y_56#zuH_`oQ}fWn1Vf*%%-AvOt9akI%r1-8nyKd~
zc0G^xJgtRQG#R&lU}CBq*Nc$M)T~uxb@_`9bpJ?bd?=?=-B1Qgr5(uuCE00+$7HJr
zZm-qyWE^+VR6Nr&a+=EZR~8}mJL+zZUlOXLVqnB0+?-|d(XD$jq;#KyFI@JalM^zp
zIcxPM(Ta@oJ}u&rK%8OePYU0aDry~CPjqq4ac5#(XYLMPDNfxyYo4)@@5?+7`Ibb3
z&N!S_QVd~tNdFvV{L%*9^gZW{CjLnIqT5`S_JRy5Wci=E@?
zRT&T7TRt6Noh(-s%NySvmA3&zD5WIeob87?FD%GtGI;>=ZqcMZ;yD}ze@z+iy@jMi
z%nO{d9dc+8IPVPz`FN4iIgN1@t?M)8_Xj<V>-i;-ZM5nYTDs@zOvX{|Mg@SdyeHt-Pea$
zDV^i_3PV_#O=(;GxDXy)4Av`>cQP;CcxC+nwxVfJPXxa
z8KBp0!BJFiqsiuzFXD8^g{&+5fb*fq#@zwMAKyR@^z1Aej5t179iJKi>=Pv0!G2+1
z`Dn}I4tcc8z89lqYDtK7NhxqD6l}yv)4a(w7F+<1-}4b!x)$GhH82sl_RfrFi|pF~
z&gw*X?zAKj7V7j242eq_0_NRY5AzxT6)-j0oc2T_=^lfrCI<3iGi^9@>Tll{d;igh
zUMN_JAK^&N_D69#QG1qoTJzT>SQasego)ASN{cOKdo?+a(?%9>QNwInDD6>_Hp&lJ
zW_7I<4u&$<_-dQ_PnI&~yZyHkrY~MRn;PMDmiTNEo
z_#8;CT$FY%GutFT*8jm@vJhwV&EJ3zEURXq6xg1V8I^vClCQt%u9X%d-y<@Qf2kaM6IpXQF^^{MJkl9hBnRxiw2~?9
zF^2x0&6w&p*c9Ruo2PkDaiggONc~{U%U667N|A@Ft;WncP0_WhD(O}hE3LOQ^l){5
zbHf^6T(ZSv_VbGaeahktF>?L)W#pzBX5S`p-)i0IB>z0-*7K&`bD7u;OVpxD
z=Y*1$7RNTkn|GyEYU!(6?KmeATuKj&l(XU<9(*-2Cs%EuXd-MC#?NNrtMi|NlSLeO
ziwq2?=Zm$yUtPjBBt;KHb9-MG)F+yjhga?_rj)DIGnh1DpFzt6V}OH
z0%P!a2z`ZoZLWZ5jgDQv`q_5B{qgSaXjb#67)Dn$_x$YaF!(IV?B4GFY==F6snKfS
zS1(*5zCzfkM93Vg)oB&A3#<=^kb@~Ww+Wj``6V^>jB
zNWa|+IhQcIznTvU3<5BRkIlaE_}=tOScNQS2a2G_ci#T}34cKWA)$e3o@*<%v3LLp
ziG`T94EL1SurOhz3bP3C0B6XK`rU^5#_}l737rzP`(1B4RB3B+cVxo{e~nw`dPwEP
zuOClMyE$>QC41v!^5gH{a&*?Tm42MJXlWmRDOfgmMmw{{;Rie~Y_ny}<8s!bARPpO
zae)rs!U5E$hNmj#dZNj~Q#yjs5PHTJ{M0RP1;1Xz
zVLg6bqx|0{$GWp}7SdGUqj}$mVGIujUqRggI>*$U#OwEc#P=il_wucV!P}uh$|pBE
zo2g3#5lT=S6RDt`IjLK-9uE)a{VB3g%?#O-4K?G6^Wm2-)q?uG&9*)My}bM9|0L+&qoZqU4LEA5Tt<%$*QTPo$cwxRef
zhM&IM7I7rt#o5VSK-zv!L)37!Q8u1$Ge*!l)w&0rcJtxnk}dBRo4mHhvD2Qs`PngD
zA6M5y?D*%)hL2vPEtKI_(JAI^mA;KXGsiM~QqM>^Q3+gz%fh5qdvW+1C?y3UPnl
zysf)9BFe(4(v<+p*v?W+LN}t~!>-JMWR{50
zdn&5o<;}0<#$JH|l~2`WQB^t{hxUdJiiIbk#8}&S1avvhDPp+Dge~GJn{|>x1<=K8
z7?<#M!SM-4Qk&pq?N;BEc_Dd)t3hUd$kA=MTlejn1OXW~NhKI{+$37fDd1%~fM3NQ
z$$C2O^KRXkX!3K3W;2+RgBpX9)%B^v;E^LTI9Ma$W&zE`(fZ|5b-h-V@K)=2LT$)j
zg8SS1ztJJ>en)Gep}as`mNz29%0<>
zt4*U^p{`H591MedYj}z~Qh}ncJ8G3E3ye)lga!g@pk9wQ6=JGWCP{Nt}Wi!YY5I&?M$n%`Aad&UwXZIqub0^89EWJMN?>v?5b1+8Hg5ARNQK1x_KhsJjchKP!)1k{;jQ&x%TXm8u^S4|I{d$ImhUxb(yc7A?+anR#=
zPLwfp2}uW8tai5coPOy!<-JF`xh=KF2;>!CYg@c|7FX-0n-l5}jRSadsNp#)Vbb7U
z{kruX2To?UyFD0@5cdhm%LTv-j1@NR5B%1;-(i{(*wq`dd@rz>{SLg97I~pTG6WWx
zj%BkcH2I&G7?81S9k4|
zpjae_^CF1oFYus>9hf}u?D0NYy~CHYt%0b+kZV;@JArSSsQToMw#B*PZuMx|30uI|
zT4+bFQLUx?>^mKx4p64
zx6+JHFi#=#yr{7t}4*H4GMWVV)+$vk$rj($cuQ9-j4Z*2X`G
zht|C1%@`T9HGSUp7=~04N9rwBW##=UKzkbm7o58X{1bdceIP_3!`ujP5X@EwepmRw
z!lnX@0GOA&ae}VSaI`Z|DtZsdQ%3SJA3sY;NK+oY@ZKB;4wfEs;$`4$$FJK<yp6@MWP0d!smXLrlH|o-Z^Or*0XDd6EpHPcpjC
z(Ch!=&CHr@UlO{>G?C>D4SZ1dph;{^Mq#}vFj3K0H!EHfw?Ex(fBya3AlYVWDj@8;
zvz^42*k_vzA(MPNZO!7wi@p1%-^neMg7t**m%LhuO^@8iy3f(-CGBIw`7pCvX^W;w
zO$5u}AWHKt3NntP&yDP0dPb^ge3U6wJoZ$BLGx}?V}+R+m&QQ_vq$@4!~Cf#HC>56
zKO`h8YI*HzXomb-Q>f`n7XjhrDwhpmoIII?$hJs=8O=&67mh8#M@qrrxY`dgl2}9>
z@-qtwXNPMNX^S>kDkiIHnYVd!3$&VD)AmvyQ-X
zTRabUcX#W>zu8l0u5@zjID}JnBTlE<($e#0>3mN`VJ{_uzx;8y#3QV)qm|Td2gyAN
z|Mju}!}o<77bqKtHETl=W5s}lfB*h1EM(?1F;VDYg|}HPb(f_US~SqQ4}a86Ffp8>
zDQgfPmtJy~5*#6|*
zn^@M56NMj9NFF*zye+Ir4Xp>NIwS*w4U|CMbPsb)_rK~%nG^kY^HB}4)11Dj0tvts
zibZWMt8Bq?aknK`a!PL2swIcbqF@H2>lei5YzxczF5QC|ktRXhSD#Y=M!m#_h+rap>7H!mJ6=3if34NKk(Hd3wsle*vans|L)
zNpNJ+?D+DJ`(a9B+q#7RukYWtUU%^Ezcx6e*B9<|v~RfotiyG**1Z!Nbhy&;`$Aw#
zug+K^aQ}x>&(i%c{k5o_WU!4ckCS(sQ)xP%BJTwq%xnYr5|k;^SY02lxv$dKx3aSG
z?_Yau?AOw^Ha~xVCAGf=F9LKC-=BHsRFdU5Jj7l~OGz{Hy}j!Ty$gLW$*zK@kM-YW
z<&nP6F#EbqNv-U$X^OqJl1$s^=Pim%f_s4HEd(pUf*&pIBT$onTe84Ga7VRuV@}-B
ziSPd+GIJw_LEPH2-^W*E2o7gtNRag`3+$h=yLGK7uEb3==c>f3V}Q89-Bw^`#&GkUgbJ`?e7li~2
zTd^DWX5z;It!-=Gh&eDQ*b^&p7pT3ZQ{~r`bF%Gap42ku4sx`h)70xL{@r^f@$4N>$FY+0Ejyu1gTV!jC5Rt_c-VU4jQa{_K
z2T>W1&t0({^*Je-r_TvN-M@|ZL0*$i=4ENO4pvOF37&oiT+*=pB~!8;#hJAuB5r`2
z3n+OcaOw6AYw%AF(AEo>>JS$eA)e_LLG84(#r2gkW@->Kw0*-QW7d+}UjQW+UO}G_
zKUpOj6XLk@!zIm!0vCt3DH=7AS+wmQ;bt9Sr^HpOk%#);Fv=yH_s{)aR{gW3HLDKb
z^hmhEH7x~jeiz=>%512j)GYSZPpfcev{DvC?=rO>Dsvq*U4S4PhP5|uPk*a0Mn5fH
z(Rh+lVp!Z?kvPL3N3o?ccx3U*fIbG?EC_eapIpw3e}kQfhy?Yx%kwi55)uH^%x&TidC1jwa`L=a!54~Bx0o+X3@f*l1X9wHh;*KYRX51+Kfcm;@SD1ondg8m{#xae%oEV4=i~6{&++$r1kz_W@|jQs@>~4
zVd6FB--*rlD)+RvK8}Hb=H>-$<%&aj^RWiEUG4e?gKrwE2jVqrTofdOV7A3LcD(?s
z1y0g?`_E2gWnZDRP|Rbm1(F{^%&L|AA=Ix32^VQKeB#UXSC?@Sa|P|1CZg|eek=;w
z9Te!O<~*3GdN}c6Ms2JJd)4;gd@Ox>BmkLZ&BBGG5XO0ej9*s4A-^mb!{xB8W1oMA
zz+MF1UBG`?l$#gHHUDcx?05)?)#jXibw|ggTM=_rd92gn^cG%pVQYIuhBSQ@6x-h^
zU#@>hcVT)s7txMyT~R%J=B{-4p4gtiVic#QqA!dG)EaT;>ecz&5V}biN;|Pj!5D14
zw-GmzWr(N2CM5V&q};$Ue-I>2P{bVnuq*VnL7LAm?@C636<0C@F0P~*HC}}s{!?a=
zIzn-e{B+AN#HrLu+kP#FVxOY~a8^PhWKMFH5qiVgOfHHOZN{>h4KP44^_B>VqiiNg7F>$aJVb&NTUki4
z8K*x4C}tcuXXP^X_3@!WSiOp^U(YtK=c6llU!PoDvtqfVl)voG7T8NQyW~crcXQxk
z2!-SL3(^sQdKQqm^EJ9+rnLd+6kX)zxMC_dU7%!D1~yZ4tqwc}T}b*SVb(RN!D^T*
zR^V;7iN0QBtfKb$szxoTRb;NtGBT^d%sH7xx!Uq6!&!-}Rvw5%VZpWH(lNKFZJJvf
zAz?Sm^c0GW`e7;<*%+mwRxI>#?CHUQ{@R}x$FM52a51h+C2MSl?kbL$l0-{r3SXx4
zex*Ok0IEWD8P!3A3aHj6F@wrqrS=nRE@?K6qhp#{NjcSD&iYp60Z%B8M6NL<^pg{G
zkTh`l1)y5f1O;+E8vrGIF&EGkETU(AuE?%!q5{M
zf=q?UovP+cU2=hK;5EYF5UyDz`g<#=CrjiWRC59`#VZS;0)@H5ft>;|tHg{p18aRV
z=4G8YSvM1-;Lgk19B1lNQNt5dCellFt2|+(r$^kj24-g1wd+cS5GO};ItAM(sz>+&)!JtRV0=7DC6PCaI-RC=a?sSUxPQ%S4CVAPguO7$eON~3nQ
zrDsh0yXojFHF$8sC6pBkCgZ!E)o|B#B8?DD2S7jeL3=bd*;)wDo++LU5Zv9<6qW)u
z(Y7g)SvW@aDX`>;(&P6JYBXYMm12x!g<
z)MD;XoUZ)wjF9qE@$kcsKK$SR{C|7#g`ZTbRo!{5i7=Z;qA3(NC<{9HtRPuH>+@!#
z*r`Su#ZiC(iancA9V?dNkj>QnaA&@ATpz^&z>ZdaY0Wb;c>JXJ;DhS1lQ=bvLxXVp
z?fmxDuy#FPxttFVLvJtZDN9dCP4LsR9GO`a)?)w`YHK?iX9oDr`Ukjx%%s)OoY@7`
z2tejc#W^YK2v9wKnY*@ziGTo(uIc?-U`v8#2;>8z%9!OS4O(}{o`&z%%QRr978cM7
zx?R5myJCiMfQ%B$Ks=xwsF+2_Sit-ggFE2i=-W(yVm*|v&t?J?tElv5Pz*E<&PuT^
zd%{`0TEW>_HZdu#UB?R-
z@X}?xd{tb$Bn}^jojXhS-^aIZ<~==jMxd$6-?A}(*;=|CVOWhc-ibGWHIYI3A#NrG
zncuEqK)n#Fa^+K|cD)0ZnY98&VZp%3HfhGyG_?K>VRl%fJz>ECLmHx~HCveni_#De
z4&u;|T9~AaOUh+r>hvJNXf6fJioNMyDt-c~gxU>eCXNO5A;HQ*=^r&YgS!3cU-=*E
z2^(6_M!nzG4p5Q&dO76;{r#|V6^u{8{DNL>P#|*FitB&{)
zNlk+_!D(DoVkIJzar0C}XG}tpQS-o%ad}WNQ?b~r3RQI)k~7xU>)UcgT^&D6OZilC
zReGhL$}_Jutf->38OH*(5mH&4DK%?^dWO#$p}jQ($c!v!0|e$`rU9DH0XAHJ?d`XX
zEE_(0^u&*T^0WKzzH4}RSS1@9UB3MBy$`P#UGdI`MZbGf>)SPcE8EqyP#hoP!PUEzL
z;)VcQDGOmn0)U-Mu>?GI2H$h2tKH{Ov3Iybh0n)Q55iqjMfg2;=$C%r*lq3!?p#gc(fZ@B^LUDtDv~m;$o`9CI
z0pm*8B#H|M4BwUTQODYhax_XPV15CvToo5C;+eDJ)9tE`bv(v!UzzUyH+2v=!=CZT?{Hd
z0n7x2Ts1NrQ9^4o9f3B|AJtMQ?Pb=r1t9_To?>Q_N!xt^NJ+VH*|6yEE6vVAsYm&c
zTyM(dcFTkufmVgt%qu7thvF0k=QWSNR(TbJtb&q3*32AcKu&M%f1J*FlA|E$W(h%4
zSQJsMQd`L*eE`&X#D-4hI}0lQT9&EyEApEEt~MSsdH7VBB^jX!?Qypgn0{hSG|C9|
zs4fZ+{jO&^p82}K%k;Zwz;GX7?EZW2dG_fi{_}Uf_v&kJe(sqkKKq%+nNcK!kB*$U
zICiDPxSX&6b)FUHCB~psF3A{nJynV{V=_6raP-Xi#~*pHzqi*UUO0a8)K6ag`Sj%U
zlaD|8xu-s3j4)#_rsja1e)X%ncI_IJfk7(t$d%nRoAEsiPp@Na#s$nH)P)10o*Ph)pZWQPlc!Go
z=!ZXe{(H~A{qEb-(=&7x`f%X1W@ekoKr!T$g`{2#MaqItaTZy~%t*6srs0Qs4p~T}
zxDMryrMOt-7ghY|WAV#Z7cP7P14HcL-R1jt@y(lfU!T2(rop{esDnn;H=1Jb8ZZ}N
zG{uDh+Fv69gCo*>fZ=5qyJow$VbD)DG
zMz6)dCk1JV9TEX4jWc5=lA`vPa6>TBb%o^;Cf8{KRCOr$S%%djh1>%-aq2#xsXEZX
zK}-deqIut0s3_GuL*>)aVw;`>M|Uh|O0L{bd9|Pum(YZ8fQgNF0l@(T_oym0vLrx3
zv2wX-v^S9^81yUyEukaOQAzszrJB#n<=SG+O_jSI(8QRV
ziW+83qAca`$}zw8R%NI-bC#^R4qE3_Y_n}xPFCpHQk`N;$Rm@5i7oPu%XN&642_!A
zdEZqr6F3rSfqLwYdQKt;=$N#z3>Hd%AJIp$1CFj({>5MW>>K;uI)38Rk6-xt{rBuz
zJ~BK#Grdr$-ne=5rPuaNPS5oB^-@zaRw|VkFG=E==@lwMALJB6rBZqOgG2Z2+BfMW0>
z)yzZG=`0j!Ha|am@WX?bE?qu(^3>;^``lxXJ+^(@*1o=ebz-*K=}uYUvNa$$=^Ijv
zFe@W=uW3PmdfSv_)O75Hg@tqH&+Xf{|CN_te&NLzPM$bbtyY+gyU|KU^M^LCeu5FY
zPx{Oh=aq%RdT|bl3kPfp#Sx6u@o-P0*vXF_O))ee3rQ5$qx>-qI6DXLyjywY4KY3r
zTek2g9xHF&%!URnAVo?`$?gn_?IrHAQN4gcin&kmqEr4Q(ThFhZQMe@M!L%1xbmZJ!@&Wz>mIgo1H0=^{>Hw_@e^WtdSQH`^w0z4QYoEcmXG2#D1TWE
z_54SjGk-xIB=equ7Mx1ueoq9TEtrYMW_Ep-keK#)>|8d238qtisA@x+8bGyxik3=w
zf!UW30OaN%FUhEDUb3e)
z$YvEf4IoV+&XEwQlXifpRJ^(<>`75w$R_4&*w6*?}x%n{nXe
zlrxPF=lz@d$q`6#9RmkMTSgi`x*e0-%?3J_2
zSI58gy&tYyF)*}j+4AKh3v;tC{{1f(<`?!py!)9aA9s#nPVU9wD%U9so)~421!OLX
z4O5)oW}GhXPIPHmP?K_UZt2qHvG0HX`)|Fq|B;6ue&UHIA9?hVZQHjG4-J*eWpyo@
zBz??Py|@OqEVPv3f|P$|23~!=^2R<<
zsfhdUEkFHadEM=Pece%BKAUMH#rb3*JH@S5{#+F2mW41Q&`A_Cj}d6AECeV^1I+JCg6DclRzs$`f
z&9v3(*#!NGb?BUZKxQi$aS;S$wi}QqE)%5DH!+!bKy$`brYC1^LN+Wlu{xhiVhHIz
zDwhlOAh!w3&=y*{(|UD6?Z>5E#wLBe-kM9RZ%EIR%roKEi3D-
z_LNb97DeoeH;oGEbYqU@}dc$~>(DnDtVo;ZxPuo;oC#x&>`J7Si9P)aR5pN$u8E
zOE)D(OXw;9nZlJppw9Fq+el_tQs)Y*5v-uaWTY=v%U!@IWTFOIf)cs&BnDiYhnyjh
zo`WEN7xjr`!a547Zj7^1?f*;Kz%B=HGk?OpwED^eMEgygIP8_^ySY#`|f)my!yuezx?ZObIz-x0s?qXU*Eu>
z%889YY*!Yla`U!8QB_;6=_#+8Pv%^#3jFBs@mJ2DV7$zE&FZJ7rnYan;~)Q{uiSpy
z8aI(_VlSo=M)cxLvY@zN?mB=Y3$cu_5XF>H!=u;>c}=D`0GL8?Abg6Be%50My*0u_
z(4oNT>8YbfkDWbx=Jhw;xbOaZcYk`%{rBH@_pV(V)~;JNvYgW?*|gK%ZZTGEYW8gA
zu^Df^-7;cG(hgUE;#9yGidEZEUy=VzOio_DeEH);hdwxX@ZERbdH=wHGiT3CP0g60
zmUkFBIDsv+{Q>R6R+kVM7Dlm?&ohqVf@Q%lf>1EU{;7zK0){CDNV1uFAMSCo00mLp
z2IY4cQCC%~@b-I^*M3!DjO~4-=QDdt%a@yxUS5h>(kp+{Hw>rsB_2<7J-wru9ZpJ&U4*t-WRum7qtIKV#r
zDNg(CqHLx>ikqkWW&IxWS{$v{q24pDGc!MI>6pFJo>mnL2zp(R%*F;&Fz0A%p=kiU
z5faF4N3&ha*&WQBWd|o1+#GE!&k_b^TSz;jx~!<9bJa4Y^19tH0@RI+a>p#7n4X@U
zoSvPWn(XWCUA1aOe_x-_yKSjZ%mhiRawGZK&Op#E2!pmYIZU9rk=1*p#wmhghS
zX|f5XCmfjVniRt>be&E1GACgd*FCYTr)NZ^^3;=$
z@A=e&W+9S!rHSmtkg*qQSzx-Af`AOn42=O~L1m9Ai4Lut18zMCTjb1mLgceUcGYq%$fJ!KlskuZy!B;`22-Wu3WjYFu!2(
z10cF9yy&)-i7L^dIq38(^QbR^;s{v?Qk>9>>yicC@GL;F**>377SaF6z7%&ZLZCrxZ{<-
zp@%!N;XRrVgxalxQA-l=eEjg?
zlUJ_XxO(IIijkqaciws5y*oB^1
z&PB^bKp_u%Qo9eKDua}zH(C3)#2MpU1xq7}8lX`AtWIjhw01B`t;A5}#ICCCjMa+S
zrVKhtn~}T9mESDvY%ge&Lp5n(w)n~=dagDEBPSQPR-F>3Ey6Mxm-6g{P^-i$t#pa3
zhy}I`pwmyyAIw^yGofuei#snA52zn}34qM)*)%;72k3+)nENIsCr_R}vwURvXP$XCqi9S#T6Hm14R-
zp5kPiai$_I#ZHnY=LY$6V6cCne`tJSyt1%BbC75zBB4^LT)uqi^5sh(efZ&Puf4W*
z?b(;Hk?Y7%SSB~mHKSnM!Gv8)C>I9o{jnD=qfZGC@jL`OH
zQ&pN}whzIHiHVD2V`CS`&YnAa=%dvzMf2Jw3hU
za%pB}#&xb40~7KC$u<*DG322*gDm9f#r4QSRz?b^I0P8(#gG8l;KMy#77{6DMN(Xt
z@~h$1snhtPC^1Y^((S^tnN&bys~XU2
z5~LF-EF%=RWr`6wcV>R`mEWo*!~#N0eJ!AGQz$%&O?_>9$2A}TgaAzgS{<+r$Wjzy>G|+e{k8h%vH|fns9|1`y&KFcPqN6h{HJh~f-@A&Qk(Q=crzb+wTZ
z_T&@giAg+kSp4$k>c|MceN6`{zxU`&Q2w$l4m!m{_rZZoKLkt|uCX<5f)Cv(2j-_5
z43$T@a$KEV%gjDmuxE0q^NL(R#+V^dh3NWygaB(kA+sk<2-K4{3Q#u>QyGeB(Nukr
z9i$=9a;3^{MSKW>C(oXL=eJ-vde&8V^U&PeCXc5i1i*?<6E(!oi}
z>tqdO!%lIc+=Nv2E+n;LyzM9Op7C7^>5k_3XS3BtetB
z5J5;cxTmLQT9o!EE`$Be%*t$*F$&iyz}c-8Rlj9Tf3-j6_r~xSzye2~F)B1Jmwr$@rc4_SB;Uj3rL;A=#@6pq^&3F_y
zXfsU5*R)F(oO1g^iAX@F7iR!$^1~ehfNn2tToyV)`5Ch58F=%p>iJJ_$2R`#Q$5Ru
zYkiA5gURd^%l$x1<;nodI_arDN^?K5oVy>HOIY+>5rE8I6bHzBx1Krwqyc&bVQyJs
z0IiS(>RSh(E?zXC2}H{X$T9)e$4KFT-bY1H?3FMDFoj|#^<5-jKAUkmU8n)`0yfA<
zYi%a-qvrJDD2mlvYgVz(K3N(c#}g;TzWvoNf1xZFYl>C=A|LJ$ul(cy>nqW*72QZC
z(7;GIx&^WGE^d2zfPwe*Z%jSuYyJ^!?L}~wNhrqIi4JBa;snh(8-a0)+(0bArb8pr
zm{S6z2_3b}s!r*lQ?h}ZKvS3b+O;KKo}F8G|KQ;t{`i+4etdLbubL*dhPAgXK*jjd`n(RN{b{)@3rti|@Y?SJ=s0Y*N2y|>
zW1Ic}SGu1yA4!ztf-jV1OtC!8sCGS91=riBWlN~{)N-q8A)yAcRq2X~Dg|1rnF3VD
z81j;G|D~g>Zl$#z(9zMVq+FlK&T5{#8c~+yPT(>p*O^hgHPunB^~&T}<^*89Hdd0!
zpR5^M6IX`d37j|cl*tHfchPayF(#b>Fc6HB0w}4tdiBP8?|*ps$gz9wy7Sk+`VVU+
zE0A+msa6-}=X-iea%4oT`3Gl_MNx_|EpZH4ndAK2?EJz)Z@Ekfz&U*I;i2#Tmmi-v
zdFuXqcj;cA)Qd5pzR|Maa)Yk&VlGC)fX$`Ye?odUs_8DUa*D8QWZBog_O%r&R($un
z|IbS={qoewlk*D;pbz#E#x>QQy;7;%kgs!}ocr*jkG}u^{g-O!N
zHf~(Kdi9FsD^{*rxnjlgq2ZyS!QoP=Bq<9r5_p2?2Nwobo}z^sLKN$X!0D;!o8uGX
zH*ee=zj@>OjcZr0p8MqdiQ^|uojh^+%-Qkr@yW@_smZBIwc>rZKDXP>&gu!Y(b1Kk
z{?tQX`3L`C%eJlG_{KMm96oHslcvJl1%()!fxHwKCkx2Hoo&r83yF`;EV2-yxGB%#
zras)8A`9(Tes#&Q6P3e9t0Tkk)Z^tfYcxzwiiJ5xQU1BG01FFPt*Qqxs2NU1(_3iQ
zHl6u7_k=O?eAIuUvlvCQaGFa*eK)kA>X4`Fe`g)z2-v4FZ>bg>+H
zFpUf7RuPz2UpqPg11Q`E)I%mSBu)c#nq9x;1L_eeGyfoFWh4YBnoe;dU_QC`A?|kOzNABBo
z=c<*XC0_FimksxAz2lA-e(}=hPCUM4^Csy$sw`7FF}3uy<;p8ms)#6hddfAg6ZEKq
z>v&>Sc@DKk0p)tqCOe&@S|4P@ScyZa#-UCaWgfU_bOAZ#qvEmy36(_{D=IA&bsJ_E
z7lBSylIf*+NzdtP6;k7FwIx{Po~9ZquR*Qo%JYAjI>DByoEBV*k^tm#;aaF#$)#sK
zC8G{yl6#hcj+3#QF3|J|t)|+@5Wo+=pr}rWGr}Z5eLhzlpf{7RH>GCWeS_bXi~du}62WU%R$c;@7TUf8pe!{)e-|!XymtNC{QO+)e+%=X>Q4Yt0s}OD
zl5YOmujO)iWckQ__uc>Ov(J9<^Iy2{{`=0HInyh5J~IGk1BHzjK!~-OA}LO@nNTmb
zv&wbJ0<*?mhb)8u-H1M2To=&xV%wng^nLtMLt*KXkW1Wrw3WgaS3KpUjUbS_M=!H{xVRDf|O
zDC|m5z7b~#?D_+shs`b^)7df61af+Slp-b;keHPNCKdohBT5EPCv0)i>`0yI*f$9X
zb`-8l=qetd86P_yf(5h<=;R{R;zbSR0Ln*fOan#&f`3k=TrqJNVRRryG@$dnF%-*j
zUx5_I+e{k81`xv(+w(LtQfwPA+Gau&GmHmBdQL}E90ufSlDaO6wG#l4(PlyvgPr1`
z%YrtOOtJ2p^~nMhM=>vBZx8S9hh@vy%2C$e54Wvmr_SJ8?^HK!Vm;-+gk{4I_Xd>T
z+FqLSm%;cn0Isj^g3#xt)tTeqyANygq)Y>pJnG(wW{FfUKPpM1$1r8DB|pq^LZc2!
z;w%lrx&+KT+0CX~njzEC!0sFj(-C}VdmZ`SwF^08vvcz=z5dpB|L!N(Z;n6v%wu2t
z!joG!tyAMXAq0Zx?d{#TaozOn{J}#nLG_0*~J
zA0Inesm$NM>+Y>vHW6Q34{CJoXS1zaPyN_^@g`$W36Yg3rkMVx4(ruYC{4&Dq(!yj
zN{+NMH^t26bF+7`ss3Xz$q2AyTC=ivw#tLevI+_RZE2I&Nsp!Ah>|q$BzdD576`zVBRhIIRt?L
zE!(zb^OwKy+~K1q-+BMwORv2Cwg2kNM~Cawz>9t>-ICXktWciBat8PxsaCsb;
z&L~!UuG(%Q*Z+aSqOq^9r_nr3+zVzxFyi|JXu}Ajq
z`4oUNLsCR9j*x{QBaj`i=@bV5Ap~d;7CB23-a-K%+QEd5m!F1pwEsT|0gHG~fRg=e)#tsazTw9v&SX
zUAAo5(D2ak(6WJn{-NQT8$8h8-_z6E*VotIKhWFP+uPH_Ipe%k`&lY6;MHnHpb%9d
zgs4<1bMtfavvad^wSVX4XQ!uTre|tC@YLkgsD8_;I=XeGsIfRGFb
zwih>D7W_UE#B{!d0Nn^X^J{&*SPunSC=0ElxDe$>fMX}B$4^#QuVxQDP%4+b0E~8LecC(fRWbzzDpl^2o$u?a;^9ZwmW<*0`!;%5f6KNBQ
zVjhUrW~rt$C(n$MIXP7#8&CmqSrpxX$PxiHSS%pKGvc@=-ijNGFJW$q!H}3vu?oiY
zDK>yXM!;+KW>8F-ZcX&0E~=)OsdP8YmJrZex{S9Om*Olo69t%+VyZS~r8o`HlLf=E
zD+`c9F~-kajMN}&lC^0CNVW7X}^p?tS{G>R1MBIOG2|df}?1#I4zif=hdG66a
z&i86F@!-)919d*az*EG68dDTtzEDrQXh24@Bg_>U#wdD7Pe4=m(h=QWXaXa
zMy%=`;?#VImT)r*#8`<{tJVGQeE7ZR|9|%0JUGtd$`gDqzpn~~8w3cD1n*0{Z&4&g
z-S;u>w$<$!Pdj>hJiG1Q=$?q3`Fm#e@7amih?&{G+#S(}#~!zjwq;wEtjnS#ilPpR
zBB_g{c!4(ng1E1$FLR?Z-^(w*uWnR#w65cxe;k9c+jg85U`E83j=WkrUdij!W-8f3z7#cou^u#N#zk9v6
zcfq3eb3K>u-@kYJ*7eyZ2uO*rLqBAEV*;7eSsHT^$Yz!(APmo%+G48!tq`a{QWs|%
zN!HDw>1vKb7)d~3-B-k2iW(7^xu#hPlq;pb1f1o?-JTIDk#Fmg
z$@};3d-LGo*A5q_Pv8ZDt=mX`sK-05A<>bHN~MgC0Kw?N$Oe+bab}3j9?wLXrM7CpU0(TWp#p*!Bj`wR9)h!d
zxWgP(3z4PG9OAJN`0Qk6Y6>^qCeQ_x#y%i8(N1S7mL{m1Yg&R_PX;zL91q(@P!I2X$Waje
zfYMB1$cf@{oGw4XNH`je*V^263_(e;EpRQB@*w8DaUd3;WD$s=D8XEajbpt=5F`I0
zFT{ZE23E>5QHaStaCxR$h>P*e>_V)SRSaUZ1fvj_B$x+cfTC&viXak5lo>TK1tTM>
zw_jbkqHYdlA9iF+zIiB!qugD7dQW)0eK$WleJ&Hosj2D4rqud%
z%ae&@U*FANzVg=Yott*;*fcma^7CIFcjHX)la=HT|QF
zT`xl4VQD5*@h}ENlI`M57`d(}W#^TZM=&BP?h8qjZ8!Kr1OZ^_((Wf7d+69FpC0@8
z)3fJ$hK7eb=CwWk(C&vHx^L&U&6yyWo}B*a^RG;20>rRMI5jipBmooW-@b9pSDtuq
zK}Xw`4eOr!<%@3|dhg>;Po6*DqX1Kh#6$Pry>;uxh;SE>V%PqyUM<9)SA`Z;ezj03
zL8vvv{KK#>jVVOnzUzuCSU%J{_1BJ_JD+;`sne%UeR%ZPTZayP{K-cH0|TR@qcR|~
z*CaM@7K-8$3;>L0V4GqL>XZkh*t9$b6O#c8L>1AqZAMQ
zyM~5_)|Qs#%a`xjw{Pc;9h)|7TCse2d&m4JU^JFcxjW3C7e(t6GX{s}^Pn;u|;B*>kF?5m&Ado)1zQZ*K{R
z?Y{T+!LbwRPfn`d>oS>yRV&5HW!~a$Y-_`oW=N%C*0hxF
zpP&1uAO7Ofl`Bs@@xW7$-QPa1MbBsjO1#!r;o;G-R}LI{_sB6s+`4uB*Z$>`UGv*i
z1_p;mfB5W+KYH$^hUWT@PMmu7xtHg6wEzC^KCx@(hK}~O@uAUw`1X&_oV(b)Xg*Bo
zAh6Kgn{`Mg$OMB!Bgtg4E|s)%Hp$+XHcX8|-><2ww1!ntva_O+W_$akzOzV1rzdE_
zGWmU^GQG(3x6(vn>87qI)UM3|4+S-{LU3~Tl@#;{Ajvvc-22OXut&ipZ88hs92w$A
z=_FfF-jNDL)XDw^*DRgJhK4n(*Q{E#djJ0YU;S7A^4!_8M~@!;
z@aWMqXV3Qa_T3ohA08eFSEhT^k$0Y1pA|U;icN4vu`5fob!OJ-^wKzR}1+F=1Jo!kD%kis+wRvhyn5}xWon)
zLRkVe>|Ltrw0S3qC&+yn7^l;iA~P
z+uyOxOC%^zHpex;ufm=d{G417ZC5~M3)5l64bfG#GDuc
z(aB7peylE3?L&RMa{uQZ}w+U`o)qoJInIFe$tGG&A-gDr%DjblcO{Mk>12
zKuHCjheIP{FT8y4qmNHL`tUvf;)w@4=Cx|;my<>GqfAXtA9(ZK=UzO}KQNf^yk$$f
zmoDkXAdsG1(%rdn!^$@fz4Ptw{`A6yD@{!eU;FCQ_wL!@d&2X)`JL^VAbs}ig?sny
z6rLxvff=Ob==j9--u|A8S0*MW_uRRyzOgaz0v)sbb>Pq;j!rpL7=lgbY^vS#{)`~=8Q@dG-pas
zV4C7eTqnTuSU|B!XR0FVgdrArofE$BMm
zb44oUc^-*Npd`qyj(HO!nGZfW`^u~7Ws4Vn>B&cuiR8isU5`I<-`dqHHf>z@>B-Y~
z-*xBub!*KhV`zoUREqPc1$2&u3JDTT+7ohzl!r(jf>lEdkz<6hV%0@iT%j4~^yJr4
z)Y8)0($czSb=DFd9UJZI?LT?yUL8%1_pv4a5OX8(*{==V-HTgW
zTN@jjXnIrVKt=bA2@%2A2D)9Flv{;U8PD84DXx?tpJy{s3z7YD2!`i94Yb1WmVgyX
zvGYkr_G)vu6TDTZg%~2tShove$e!=mb18d7Em?$Zt=uoBC_%II=`(AyVu
zFA_VqdmGkyZLQ`g&CfG4hPVLFI1ocAi20{wL0ntSUkSu>N-ZdbIr;D<)IuqUp+dD#
z3B~MG3?blHAyAanx^E(jNT=Q4QGrk9HZKAae7B9jx=fsht!P#@cuMVU~
zM%A9(UQ;6~&7|$}!M+6C)d2r6LeR0E8D6BIuj2kz=a%%v9~LhYF;w_*J@Pm8K%_^qC9Kzxeu~HKbdHs*M@+ZKq`ugMpE;fg1xCE?hf(_R@u(
z9#4pS_U~+JZd4gbo@D`Hmsw*8nXr+`bT*cw?x>;6%p+yvy(W-ut~r7#`jk{cDXwHD
z6B-oo1`9!^EL91ThZ~KhDorglI^(E9pMNOfd?lhkB;EPV^l@Paf0O-At>
zx#!ZAYnQHExz^X$*WcHBt+#Jte4IA`Sh(5lwf~MrR#hy*iea1@a?N9MXF+t*1bt*O
zne6E3=grm!aN(jw3m11UUb1Z2^5rX*E?L~t(&8r)3E#Kx(w*PK)a}9$haZl%
zH+EPDHdCINWht&9LE3~FsfF;@`Gqdx2hYEOSYQJS5v)cnfV&A*tA!ZEaGNzhWRG%n
z?V4<;!>$E_9Pv<0^P7K-Pr%{#f_L5xMn~1A4c>kG{UwW$iXr$D^772gAg%$=6oEMO
z1I~`-FNMf#YyKiUlNVxFio?ry*=nH(B6CSCR0pwp=g&hB=alA;KwJyWk6#qcUyWLz
zm~Qrm&CR%Nv$wcgy#0UuvN|YS
ze^}6Kw$%Myj35O!A_OJ*9>`s+AS}02HqJ@ef+~!P&{~k&wvpYZZ)T?a43(0mlRQ#<
zmx@^x-%8AD`Qz>5DdGp~)f=4{fBT)InM~&Cryp9isv9Jk>AF--DJg~U#wW&JfAifl
zXD_$4&l?^dYHDm)uwcFrVgA|p#N_03rXf}L^b_|zc;DVsGLezGJXr?w+uD{b?!M4-
z>HQ-gUAcPw^K(6yE?yrR995vYI@|W|+qLJeZS{3^GLU4~n=Hr><*W^>14^dY*_6#1
zXat%r&{V6$M>e-jl6wD5=PjFfsnHR_=?X1Sa4II+Tdb7IbdJ>#b)e9+{yd}>vILaV
zz8cIdVPSf%gUM~CE3uL?(7|hZlW7&0eS?fJQs^{v1R>Nj5bJ|x^>1M&EJV->Y9WH;
zf^d>L9D*1TgW?1m8tPZCT0ssRgsG{i7hgH>xBu{+Pfwj$ws_H>e)Er7Tibs6^IuL(
zPH4OW1mXL2fLYvoE?<7?(f$A7-~Gw|```cS=K%keZ
z_fqw#dGqG2TX%<2kO_j};i10kz5O=_ZuH;i>+KyFxY66&-+!b3=D@(<&B4K;n>Pn;
z4v&mxGSgJQpB0X9q?$>THkypOoY)*@G&vuhbzGVH2t?rtud%72wRK*b{$G20`}~fM
z&aTdmj*gD`9Ubl2|95tEwRf~96G^mF#OZ02J#2*Sz=v26V%Pc(J7Kd3ScWWzXR3m@
zq*^E{#kW;0M1?MA5g?sb?;nwG9t@K5dg>LBJ1s|Ccr{+fSwq_|3m%Tfy!
zKwOSmD87ME-JO5#sD&?-=C3itv!eNp1<*Mk9(urQX@*1Z1V@eq65z>4{DuY^Ky=F?
zhH?n{;FJwn8feqtDcIrem~!1WcF!Y?w!(ZnyW$8zq&J%IuWjC>TT&VGm1gfKHm|~t
z^cqCl+ez6z-=^V`vqV$PX2On=D;!zzxb0ln*LN
zo(M_R+sXu_(X(@P%(NG3dnL@!Va4IfKho$o|A-TLTO3ft*MG0C~Hw4+6R31S`^O3K|^yQnHntJKg*Z=14zVqp+
z)617F`Q|si{?y}-J@?C(`Uh_Mo&eK)8v}`fZWk)X$0z&xZhra6NB-(_q&cUzj8q_idngl@b1D|Dq8A!mh^s0F)WHukHa<2!Ha0mmH93*}BR!p-PG^El+BA_?QhI(u=yBVLWHOORr0TN&tE)>j
zHa0XiHa0gmwYIf3H8nNVH`b@>>l^DE>Khvx>YJJyo10pa$t2|aqLmwUXNyf$w7FG2
z&F}dA?G^$l=I90%=9w8nT)A3+94Uq>)Pj|fGSvbWR|^q>#+h9+g3%4kL$IpDeJ(*<
zz2?WEQ56KLzTRtUA}ESOtN=g$RK0aLossIEeTjSa`i+fD6bnNPe2J+o&s3oK;Wt+E
z&)(sFE7U@{5UW}o?!O*tp(==PgXS+A;@hS9nZvND5%%x%>KowI*U~3W$kwLJ1NZs9
zU#%3+2;#Cdzi*KXuO|*$T5^_fh;Z~}?hVpxQ-mPejietad)c;KjukzXaS_x^&Hw;_
z07*naRGT1212qk#7O7?}dYldgZt37KF9nQwu#^NRjX(klJ4xO7ohKSw
zBRIm9p+LG!=rXHwrQo=J?9DMYw<1y&L5wK|3}qxgK`chl;5Ipxe)(5#{Po{``_q%3
zuUN6h8)Z6SaoGSOCVcFG=+lL<016J#iXbk{GpdkU$WJhb
zX9`1{!!wm3l0&eT5SJwAZeT?7&k=|f_&)B~>Q9WrYj33AIRY)Ma`#RTODgKyr1^b^
zf7pIzk~~Pv7H)F@ohY+
zbo5o49;jIxO-RFBrq{79;x$oUZK>&Z8~Zt|Rm~b>sHhr;auz65uD})tD^!7MYHE7m
z-aSGHQ>E$|FFf#sZry9Txv_rn!p_rYFZTD|*t}`&{=0T0eUZroS^H07Q$yqaJv-N~
zT(N!Y`hdpbn#UA|SaWm3o!d8WShofMn;YxBgzi?IUB52p<(o3yBorld3*J~Egz&O=
z=&aSKz|4CxFV=aB0kx*m+b3z~&E*zRhla!J2wBFuP8tBr+o-Zy@eCvL_N-0dQBdB_
zEK@u)#x%cTe~${Jws*rJ+%)QlYt+d~KwVxXg|I&ooODWE($Mw+fRGC<6I#tukO?8m
zgf2lOfte{m#qJsfTq~_21eF!toKZvBl}xA8uf6`}-~8QoK0kAA)$*m^_``qor6(S1
zYG?ojE0!&vH?QsDl`9GKQO|ET8x%-cUthm$$sz#o6N!Cy?eaYFxBvLvLxpn45PbawHTZI-08=r44#fP+a9uDH&2I=Q=c9?T%?o$(b@K5{
z6%bde7Raqsm}km@INSxOOf5KXEkLlK6x&->A%exFm~J06JKP;9&Z`zM*ZF^oAg(|y
z*z0a4on>C(LBcbv?5;2ZF@PE#f!7ZPy?tu^TD<=rudyk7O+^q>6H6p-aO9#_gRffB
zngxP$d3jMDor?s`*KZoZxxBpJIEa%;xa&@Ta9F+bUgpr@%)$k@YzY@x&N0pJ^P^*I
z2NXDFGTYIPXCr2KM-)6p|8NP?_sGFM6JQCCMYMMjLB{~KB$nna)GCacVqRLDRsU)KHmzxVWe
zM@~#nPw&5b*UFWP0~zo`sD{Rd#~#=}H8t7R+8Ua%F8Uvnt>p5NNi45qzxAno8M
zWX}Ee35$LvQTX(G8xp{*jl}IM;-g_4caqj%Z3zx6{;Kx7T
zx_Q%GySAHN=V1~It+0R;$JE9Q)B?sfuw1o}OYpWpZ1^#C1~Ck
zKCeYYgrA*bl|_|P{7Xe1CiZ2n&Y`MPs%nUfYJRY?9o@jtaa)>aDuB3#JcD5bq7cC<
zA+A6z#0W+a$(7=K1f8!^5JMc1T(vM$DTe$ILzxiI1+@_98;1}W7RCeQ)%*(Z#K*z8
zb3x~PxbL3C{P`GZyjnibaGSXxfRRx(J`U5;`Q01>a#+VS>h;h#^7c
z;EPUMRQ+!iZeSS%d2{XavVXoHK^49U*?meIn3rHth|xKtb0Nl%C1(P0*raeqJQISc
zB76aS-%~Ecxdbifiho=qh)w=v*%0RvFD}1O+V0}F_
zH^M{r`-6k(uBq$
z=j3BOa!DZYYs~vhm#ggC8&at~ySJ=eyFADQ?d`1zohD3Ov~+B`sj;ECQ3v8E)0$f<
z6FF^m1{QAQN^yF04D+)2(4-Xxrc({26r0*y>XZ}qkX5!n4zO*^?tYUTPDvNWR91>F
z$mE&!IF_KeEskb#J)X!k*5Wxv7Adpi6u~y}cxE6}&b3gXXZ=2cEouF0K^&s50d_T3
zFNEV3BTR_Vgnx)Jp}pH$CM-b^V+8ZEv=tAuS0KvZcVz^^{Pc)DABuzTc?}KqL7;*_
zj*N^1f$0I3Ws=sGrmueGcLxTC4;*}ZbYx8U)9I<{R9$lC*7aZe%99HhcIi1)!sJMd
zj*d=GPb;aCi9||g?6F(T4fGe5VmF(nT(y8Up*}`1;>ei+!J-i7Ay_rUAs%jo7D<>n
zqej^ox;5h&8wx6b2*i0Nx`ALXWBgNvDt5b`9f6=bqDFof(t_ihPxdb^vj7+$`WeBsI5;P>R9c
z4ay=&xq7zu4&Q?d5@c(O2wGK8F+s;4qg1G_ixYHA9X3~x={UKxc?gCtpvx9BBxfrE
zR6#HgBH)-|M6e(te7`FP;);33{6ad2KyjXlK|FJw$pdl4JQGKRt*O!w*O+G{gXHo|
zWe`Ito|!4c6|0335PKeY9@ZtHu@PIEv7-a$wPyc$>Qr!Momh8=7fA5tnCAB}cP%!D
zDb9ROXqhfM7(Etjgt-JkWlHO8O1VV|g#Z=UnqZDkQXf)^?pnqJ&e6s2ph0aO8!HrT
z5~itZ(+#vtWYjsmtf$)gJjm!aJ&TR$%RQ%2r~WEumy?OaqOSS0CK>4XvXVN0ZDo(h
z3p2^cvFDK{^)f`1giRipu(_M-oii&$kZ1~_vsSO}%xu!XZ3-G9dfK?8LA<T(Dw%*)b+2!`*r1_2Z#XoRr6
zecm5_?e~U;hTlJW;^)u5)YR1Q^?&vIt<6nJ55Hc&Zq1+m@i#VaSa;&%PbUM}(b2Vf
z#nL@Hw{Bd&h8wPdZs4{`{1PTx8lHez?t`2z&1lT>Y0VjnRo}M=D3Sin?^5r$mx>oiqcCVhC
zsjrL}hsSaD_@@dh5Xb}KEJ41mI|PZR6ca?i(EVj9r5u9h`ZY6x^a3FaLAusO3EJzr
z0EeKN7Xmf{X$k5pKNA!-ifD#&f+YxO!=H4^ROVI&u6as`NXwx`3EET?&ghe&Ic7DY
zA&8s@BAn}M0f=*W267;#^u1a@Tx*^w2jct$D}`9+Q^t8_mLZN3EDmvuU{Q$UY5_tk
zkPtCPbd4YmGHPlHhDX%3UfsP{Ktp}@w155@c3%sR2}RA!)y8L`5%QuXFYN%S0fpfxB7;j~1Pb?*A;v}sO}UsgX#
zfT=n*J*Nd1i$|Cc^IkwO%8UwC?8nw^lEc-d33C6bJS?4;pmJATfM85Ue9zl+=gvUN
zzxdmKeE)-E-}~XSLf{{K^_iCDW~G$x`|H-OUedj=x3_O}e7v!#sjH*Csi{#{r)L*7
zH9h_Es|UXI_uu*Kb(x1oeU6h=T_0t
zfjJwPA7Xp>509$Tr{ssngG*QB^fc7h!-7s+vc&IReuZGI?
z-J9S62MD=@i}Kj$v6Zq_abaS*re#hR=Tl6YSA?ML&cUCG1qj-#8K4g}@raNhN1Q@}
zTwkqh&jo8chXgssrJ`mC=IjPv!b}V$s)_j5c1{TJKuP72;iz5Y3C(Be%L>1ixFRH2
z3=sgwRzcfqRONw~+SybEag=8S)hN{vV&_9|CB!`3CnN}t`^mcZ9EkN!g%ES((^-5a
z5R;iu0OB~$6okgOw(bCx9TKyP_|Dr*`N|6m%T!mw96hC(gG-Se=Vd+M1!Qs1eVmsSTPqUwsOH}
zeZC5qpMdppo6=4{)%CKUTBj4+*tt&SmRf{0<8d&X?7Hk5ecDDPvw8yc
zv7h}=;R*7vDcyYANP(noT+Rx^L=@Ac%QNWP@?cs!&BQh*EYGr{At?cJzB~D(q>4Is
zNCq)14RB?`X3daW6RjXXtkAPEf~ayGP7oC~44q{v%-b%HApNSYF1hcn-BQ9|$-n>L
z*hk;}!Lx|oAAI$h=B5Uvq!6OHxv89?f_4U8~hwpy$@yW%D
z7W~P-{okH`{L#jShS>7J(Qgqc#+gX5RSRhEb=9c_cLU3VIN~d<5@O~Vg#gwGE+EVA
zag<3(KqU~g_%lm9<>mM*m{0TLtMF9sGa8Lw<*O5;*jGW
z2;Di@b2+$rP0jDX_3H(=Ybvby&3c0XPo9!TKFpjuFDIwe!Y*(1DqOkT>so+~DX2@C
zS^kk7=I=4z4*}wcly{(-@hr_OeE&Re-+neTg&Si?Gu;Pknk!-tNp
zSiWTM-W_#yKG|HzW78$_8pstK5uEGiho2i$>gb-f1kU7%t_qR_IKWhe*9g0fLlSpcK?46Z`kxi6H;tTmNw6*hk;}{*Q&gfBk!3Zfa@_+o)Jq
zkgSVXL?EuE<~QuD*nUtp{A{{MJ^^SSV~K-U7|<^Mb>_!n`G0v7*95%lxe{l0&7yIsc0ir1!8DW@0SbAmwyl
z8pL4QR}uMej)!v2Ebe{Gy8}9dS6>x8v@Np$*w<`k(Q3B?g>dN-!JnsWJkz$dBTH~P
z6C617{;v+a_4K0;EMK-*>1<#f$J8$pqOW)0XU`va{mpk)E?c%@<+4?)7Ee!43u@#<
zBg%R%Tt4yfsfFG1w`^RWNF+=)j-tmBz;-{N3y}jQ8jGESsR*8jp08C`}Id<2G#gzKJFcCFfu);|!e?6MmuOzfE;m!{D*
zr;8i61BXR0OzoivebzYlLY`5POH?isDi#1{LgrTYx{sKbpiPV{iy*p;jkd@lS4Lvj
zDk2zqJP^QhHe&&U;7y@w-2SKz{vezhB=_Zwl9K!4@K>gW#3d
z-uUys{=1W>qdz?3@4Q>yJEqe?xt}Gt(TbNn2YfwgheZV_rlcPB4Nel66$Ho&apgP%74b|61;i040Wn0ggLt78KwKfu*wKXH+aZzyJzEf0NiZ(O
z)vJZdAdcFcu3F$En4$z@5Jz|>Kg6@97NC?`D2_;sAlxp^ZxyvcQwd8g&NBdm!|Li4
znebu7a?kgpsw@IA-!e7f!9n%b;o$ubGE&0!En?Spf7ud|tOLFu@h2g~wxN((h<-Vb
zx}*gns{W34!T
zLLT`rIQ(vAcnI#l&s(%Wn8vvI?ZjK6`2|>2tMWdZLpAa{{&Wb%%%;9MeTd@(?ST_W
zl!`KRDbrl)L8lV!g;US$4M1_(s^n;+VYK$Z;T0D>at=A6oal*
zB@L`h<}X){lNHPrs#*KC_rU9iGihA2dU-10gOofhG;1`b)0xBXAA9rA`vb$neFHbA
z(-~tchV85ndoEu8r)OXM`|tetyWe~6)Y%K3=SgF2(oUIv(A+wU1`;`}nO&iVLH38OpsAo;)*(EDQXp>%(Yq1HAc@D
ze!}+?!Zgd~DxF;6<2aoHx8ja6)7e}p$b`3yAUk0UL4}10YAUI^)IIxl|NB4v-|ybN
z^ZfZsFTeiA;LxCXgK#q90o3&5zm)yw+AFV}*{hBE{|<
zy}$;B&blxowm8Akh^SeGnAga6hdk)iif5n@A_aIRL?l$#xf@uJpbIg?AcmT1{uni9
zGxMJAd3a`yO7Tn}u1qb^?+QW;l|Wn-&s3=vu#{RT9}&2H5F14;Os11P%JUKg7#UI5
z`c&IISh&zUej$i?g$RB9>ZMmRZ@-&Cf&2IRPdu7jvl^3iK;=U{nt$MS9G}Z@87smb<#4K
zd)DDWKSHjLR~LG&_FTI5(EWF>S+&Hve*u)X2l2+h;G1u~J2*Jx39)=>_x#RQ1pybD
z0}8y>H}K=1zI5>52hB~5r_Nq@=lx@wH*bIdcoY+j<3cK?wf*3zRUM+T{y-IiEV3IY
zX{=MA_eWS?8=B_NXgjf+OlV`r+Q2g<9%T1Phuh7PXiSv30B|okU198juwi^{Hk;id
zGFh9~Fo0eX!$Y1;Wf=xduoZC!}&}*77o;axCvSL4^@X
zLAz1;&;GFeKz@P+WF#*^^R4ImckbB!fBo4XuUN5c^{VAoNFckh{+
zO5cCazDFLqx2`U!0Aj-`tC3>P`YkBMMG?tEP?b^(l|USJJD(ZEki#?fd#>>fHS^@;
zJW~e5P@!5VCdE)o
zwSe*HK}BkzC?d1XGgTh$6{&?%5YIsK7lgPX&2RpaA(N@xJ+wSyruD8|?lm=HIf#so
z!ojyPM^6Opt$6=E{?;ubm2#dJZ!^TT)%-Z;HGi!R_c^8c?I@cX5uDY-eFmDpAjC7!
z{KX*tt>$-rITMJ@erbaBDcHSJG&dyfvR
zXIb0!b)d%((coZX@!Dxa(WTl?G>
zty^CQL6~s`#yB?)6o81$FKDau^9IU{fTjLiddIkhobe4va603m@bv58gr5!#{>wkIZHSjP=^gt=}At(Rg)$SE45Os<9O&YP$_nYScNLKy659Iy(q
zhMv?##0XIdbS?+_hwjf^Ed~l5&sr|eTFfyoL5HPT3;`kDWiF45R6r1m2^}c#eSgP}
ztt*x<`c`}gnNzGYKeTU#n?{gxxePD~-c6qmKaa@2zD_zmT%
zg^D1CTo?7sA!eR2zeH>2@(FnD+O^L=KQo<9Cw$)?0@Xkq;TcplB#7a%YelfQ<_{rO
zkX2%rE?qix`ZUmkna{G#{vdXE=L}C5Lb4%mx9=-u+I-MRG}7%
zK|BM^Z#>WBA6NOGH;`&%D0|qov|_5x-MajUOr_P~_kttGg61Z8^b?`
zo0+E`^P8IRc4>ZNb(q)Lm^oduxh)l_a4VF>+2u*=`UXW
z#q$T6TN;1&OAkMC|DNA@>Y+r!d*xSe9X;{sXJ^m$T)BGj^3`j7{TUgcX)$eF0=6&<
zxUV(XXtv^xNILHsdAc@wCRohPsEJ}3!NsB@Vu_h
z&X$&z2tf!N>necy`uc?nx>9vXdWh0e45g$vcZD-l3+SF=rPP95L{W%U`4E@EGZ5pM
z$nNmt=$sQVIz0T7XMb|!{r9fgkeV^HF=9Xz;hEWmxER5>6qlhEN5x0)8QEa2r@7>k^OmNy-Fr+a#(&uq=e(g<=gKBz7PBN`kOa6-7HX*ZOF(=(
zHGf4Amm*kFis!E8SGP#>mkV);+rqDt=C20gFRtdtQfeVIY1XU~_uuO`HsVL0$U}!S
z=`_HsYW~6y>n_kp_9I%?i|t`a|FopJVHX$OhOc&_)p3Go)=UI>*}RX^zFBfd;BYE~
ziMQynwM1WnN#~U)dDuHeY)ua+o55(7n>FDAKu^&aHQE+al}c$%-13UO(;WPCB3lCi3%5(#CRYMC@;Y2Il}SbBbQ${S-DXnYh|(!%5=BQifr6L9BB
zUg?wuC|k*HBF9;)*=gWPp=cZ12EAyMG?j`RT*_oaBl6q`TLJGdJ2^IpIUL8A`Hxx7
zahbVH2;k<|aEs*v1T7#12-=6U>W6t>A-XaG+?AOF{d6X{kEfgt?L2fDZI?VZ2b$Kd
z2U}qtp`+7KRVc-v0P{<6X|+Ha&d$4pxLSy8U`#EZv6~g6lpXy{
zf}-F3&f&xV=BJ
zmJav4Y5{5l@vYSSHk%BL%U$uTe~1&53a8WA7bbjaR%sMh9z+0o`?D9m=_$2)mw(qT
zKfxWds^ys)X#TcJI{5}&1HM)!Z<_EEz-4ceV7
zNzl4Ez%UJu!R5>fjAW0KnQV6=B*>p8fCjhgOkiG(MvM$*>S1A;MIlgIC$3?oo>k^z
z8%erX0KUNPg{
zH8u6~pZ{WV
zV&YH#?9U#2@Il{C0Qo4Kp4$Fit!Zpa&j0Y)Vy?ZMbM+k4o_lTPKd |