diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index 8a1bc8e113de..291d6adac2b2 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -61,8 +61,8 @@ jobs: run: shell: bash concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-repare-cache + cancel-in-progress: true steps: - name: Copy testmon cache run: | # branch name may contain slash, we need to replace it with space @@ -87,8 +87,8 @@ jobs: anyLibraryFileChanged: ${{ steps.find-lib-change.outputs.any_changed }} runs-on: ubuntu-latest concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change + cancel-in-progress: true steps: - uses: actions/checkout@v2 with: @@ -147,8 +147,8 @@ jobs: run: shell: bash concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test + cancel-in-progress: true steps: - name: Checkout TensorNVMe uses: actions/checkout@v2 @@ -208,7 +208,7 @@ jobs: - name: Execute Unit Testing run: | - CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest --testmon --testmon-cov=. --durations=10 tests/ + CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest -m "not largedist" --testmon --testmon-forceselect --testmon-cov=. --durations=10 tests/ env: DATA: /data/scratch/cifar-10 NCCL_SHM_DISABLE: 1 diff --git a/.github/workflows/compatiblity_test_on_dispatch.yml b/.github/workflows/compatiblity_test_on_dispatch.yml index 1778d64ee287..2f03c8ced98d 100644 --- a/.github/workflows/compatiblity_test_on_dispatch.yml +++ b/.github/workflows/compatiblity_test_on_dispatch.yml @@ -44,7 +44,7 @@ jobs: name: Test for PyTorch Compatibility needs: matrix_preparation if: github.repository == 'hpcaitech/ColossalAI' - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, 8-gpu] strategy: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} @@ -64,7 +64,7 @@ jobs: - name: Install tensornvme run: | cd TensorNVMe - conda install cmake + apt update && apt install -y cmake pip install -r requirements.txt pip install -v . - uses: actions/checkout@v2 @@ -83,8 +83,7 @@ jobs: fi - name: Install Colossal-AI run: | - pip install -r requirements/requirements.txt - pip install -v --no-cache-dir . + CUDA_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing run: | diff --git a/.github/workflows/compatiblity_test_on_pr.yml b/.github/workflows/compatiblity_test_on_pr.yml index c0f45c65a7fc..a621c7e3427d 100644 --- a/.github/workflows/compatiblity_test_on_pr.yml +++ b/.github/workflows/compatiblity_test_on_pr.yml @@ -13,8 +13,8 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-prepare-matrix + cancel-in-progress: true steps: - uses: actions/checkout@v3 - id: set-matrix @@ -35,7 +35,7 @@ jobs: name: Test for PyTorch Compatibility needs: matrix_preparation if: github.repository == 'hpcaitech/ColossalAI' - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, 8-gpu] strategy: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} @@ -44,8 +44,8 @@ jobs: options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 timeout-minutes: 120 concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test-${{ matrix.container }} + cancel-in-progress: true steps: - name: Install dependencies run: | @@ -58,7 +58,7 @@ jobs: - name: Install tensornvme run: | cd TensorNVMe - conda install cmake + apt update && apt install -y cmake pip install -r requirements.txt pip install -v . - uses: actions/checkout@v2 @@ -78,7 +78,7 @@ jobs: - name: Install Colossal-AI run: | - pip install -v --no-cache-dir . + CUDA_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing run: | diff --git a/.github/workflows/compatiblity_test_on_schedule.yml b/.github/workflows/compatiblity_test_on_schedule.yml index 15ac4f1a92bb..9933224f5675 100644 --- a/.github/workflows/compatiblity_test_on_schedule.yml +++ b/.github/workflows/compatiblity_test_on_schedule.yml @@ -32,7 +32,7 @@ jobs: name: Test for PyTorch Compatibility needs: matrix_preparation if: github.repository == 'hpcaitech/ColossalAI' - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, 8-gpu] strategy: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} @@ -54,7 +54,7 @@ jobs: - name: Install tensornvme run: | cd TensorNVMe - conda install cmake + apt update && apt install -y cmake pip install -r requirements.txt pip install -v . - uses: actions/checkout@v2 @@ -75,7 +75,7 @@ jobs: - name: Install Colossal-AI run: | - pip install -v --no-cache-dir . + CUDA_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing diff --git a/.github/workflows/doc_check_on_pr.yml b/.github/workflows/doc_check_on_pr.yml index 848991bd3a82..ee8a82128dd7 100644 --- a/.github/workflows/doc_check_on_pr.yml +++ b/.github/workflows/doc_check_on_pr.yml @@ -17,8 +17,8 @@ jobs: github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: ubuntu-latest concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-i18n + cancel-in-progress: true steps: - uses: actions/checkout@v2 @@ -35,8 +35,8 @@ jobs: github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: ubuntu-latest concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-doc + cancel-in-progress: true steps: - uses: actions/checkout@v2 with: diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml index 2a07a2297bfb..a3df2c50e6d3 100644 --- a/.github/workflows/doc_test_on_pr.yml +++ b/.github/workflows/doc_test_on_pr.yml @@ -20,8 +20,8 @@ jobs: any_changed: ${{ steps.changed-files.outputs.any_changed }} changed_files: ${{ steps.changed-files.outputs.all_changed_files }} concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change + cancel-in-progress: true name: Detect changed example files steps: - uses: actions/checkout@v3 @@ -63,8 +63,8 @@ jobs: run: shell: bash concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-doctest + cancel-in-progress: true steps: - name: Checkout ColossalAI-Documentation uses: actions/checkout@v2 diff --git a/.github/workflows/example_check_on_pr.yml b/.github/workflows/example_check_on_pr.yml index ee456c25f2b5..ec23b9d1c59f 100644 --- a/.github/workflows/example_check_on_pr.yml +++ b/.github/workflows/example_check_on_pr.yml @@ -21,8 +21,8 @@ jobs: anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }} name: Detect changed example files concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change + cancel-in-progress: true steps: - uses: actions/checkout@v3 with: @@ -81,8 +81,8 @@ jobs: options: --gpus all --rm -v /data/scratch/examples-data:/data/ timeout-minutes: 10 concurrency: - group: ${{ github.head_ref }} - cancel-in-progress: false + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-example-${{ matrix.directory }} + cancel-in-progress: true steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml index 650689498fda..a336526897e2 100644 --- a/.github/workflows/run_chatgpt_examples.yml +++ b/.github/workflows/run_chatgpt_examples.yml @@ -28,9 +28,8 @@ jobs: - name: Checkout ColossalAI uses: actions/checkout@v2 - - name: Install ColossalAI and ChatGPT + - name: Install ChatGPT run: | - pip install -e . cd applications/Chat pip install -v . pip install -r examples/requirements.txt diff --git a/.github/workflows/run_chatgpt_unit_tests.yml b/.github/workflows/run_chatgpt_unit_tests.yml index 47c80fc9a9fe..ec5c8ffa319f 100644 --- a/.github/workflows/run_chatgpt_unit_tests.yml +++ b/.github/workflows/run_chatgpt_unit_tests.yml @@ -30,9 +30,8 @@ jobs: - name: Checkout ColossalAI uses: actions/checkout@v2 - - name: Install ColossalAI and ChatGPT + - name: Install ChatGPT run: | - pip install -e . cd applications/Chat pip install -v . pip install -r requirements-test.txt diff --git a/LICENSE b/LICENSE index 0db47bd8986f..280129eb8f35 100644 --- a/LICENSE +++ b/LICENSE @@ -397,6 +397,39 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------- LICENSE FOR VLLM TEAM ---------------- + + from VLLM TEAM: + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://github.com/vllm-project/vllm/blob/main/LICENSE + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ---------------- LICENSE FOR LIGHTLLM TEAM ---------------- + + from LIGHTLLM TEAM: + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://github.com/ModelTC/lightllm/blob/main/LICENSE + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ---------------- LICENSE FOR AutoGPTQ ---------------- From AutoGPTQ: diff --git a/README.md b/README.md index 44e4f97f1f4e..0ddcdab741a4 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ ## Latest News +* [2023/09] [70 Billion Parameter LLaMA2 Model Training Accelerated by 195%](https://www.hpc-ai.tech/blog/70b-llama2-training) * [2023/07] [HPC-AI Tech Raises 22 Million USD in Series A Funding](https://www.hpc-ai.tech/blog/hpc-ai-tech-raises-22-million-usd-in-series-a-funding-to-fuel-team-expansion-and-business-growth) * [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining) * [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b) @@ -50,7 +51,7 @@
  • Parallel Training Demo