diff --git a/README.md b/README.md index 34c8a6b730a3..21670e1e59fb 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ ## Latest News +* [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining) * [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b) * [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana) * [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs) @@ -49,6 +50,7 @@
+
+
diff --git a/colossalai/communication/p2p.py b/colossalai/communication/p2p.py
index 1f20fca4f74d..d28d140168fd 100644
--- a/colossalai/communication/p2p.py
+++ b/colossalai/communication/p2p.py
@@ -1,16 +1,18 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
+import operator
+from functools import reduce
from typing import List, Tuple, Union
+
import torch
import torch.distributed as dist
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.utils import get_current_device
-from functools import reduce
-import operator
-from .utils import split_tensor_into_1d_equal_chunks, gather_split_1d_tensor
+
+from .utils import gather_split_1d_tensor, split_tensor_into_1d_equal_chunks
TensorShape = Union[torch.Size, List[int], Tuple[int]]
@@ -260,7 +262,7 @@ def send_forward_recv_backward(output_tensor,
next_rank=None,
dtype=torch.float,
scatter_gather_tensors=False) -> Union[torch.Tensor, List[torch.Tensor]]:
- """Batched communication operation. Sends the input tensor to the
+ """Batched communication operation. Sends the input tensor to the
next stage in pipeline, while receives the gradient tensor from the
next stage in pipeline as the input gradient tensor of this stage.
@@ -319,7 +321,7 @@ def send_forward_recv_forward(output_tensor,
next_rank=None,
dtype=torch.float,
scatter_gather_tensors=False) -> Union[torch.Tensor, List[torch.Tensor]]:
- """Batched communication operation. Sends the input tensor to the
+ """Batched communication operation. Sends the input tensor to the
next stage in pipeline, while receives the output tensor from the
previous stage in pipeline as the input of this stage.
diff --git a/docs/README-zh-Hans.md b/docs/README-zh-Hans.md
index 1dde7a816676..e229c65d890c 100644
--- a/docs/README-zh-Hans.md
+++ b/docs/README-zh-Hans.md
@@ -24,6 +24,7 @@
## 新闻
+* [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining)
* [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
* [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana)
* [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs)
@@ -49,6 +50,7 @@
(返回顶端)
## 并行训练样例展示 +### LLaMA +
+
+
diff --git a/examples/language/llama/README.md b/examples/language/llama/README.md new file mode 100644 index 000000000000..871804f2ca86 --- /dev/null +++ b/examples/language/llama/README.md @@ -0,0 +1,11 @@ +# Pretraining LLaMA: best practices for building LLaMA-like base models + +
+
+