Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
github.event.repository.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --rm
timeout-minutes: 5
defaults:
Expand All @@ -54,7 +54,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --rm
timeout-minutes: 5
defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_on_schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 40
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/doc_test_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
needs: detect-changed-doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm
timeout-minutes: 20
defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/doc_test_on_schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
name: Test the changed Doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm
timeout-minutes: 60
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/example_check_on_dispatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/example_check_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 20
concurrency:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/example_check_on_schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
timeout-minutes: 10
steps:
- name: 📚 Checkout
Expand Down
21 changes: 11 additions & 10 deletions .github/workflows/run_chatgpt_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat --shm-size=10.24gb
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm -v /data/scratch/colossal-llama2:/data/scratch/colossal-llama2 --shm-size=10.24gb
timeout-minutes: 30
defaults:
run:
Expand All @@ -30,24 +30,25 @@ jobs:

- name: Install ChatGPT
run: |
cd applications/Chat
cd applications/ColossalChat
pip install -v .
pip install -r examples/requirements.txt

- name: Install Transformers
run: |
pip install transformers==4.30.2
pip install transformers==4.32.1

- name: Execute Examples
run: |
cd applications/Chat
cd applications/ColossalChat
rm -rf ~/.cache/colossalai
./tests/test_inference.sh
./tests/test_benchmarks.sh
./tests/test_data_preparation.sh
./tests/test_train.sh
env:
NCCL_SHM_DISABLE: 1
MAX_JOBS: 8
SFT_DATASET: /data/scratch/github_actions/chat/data.json
PROMPT_DATASET: /data/scratch/github_actions/chat/prompts_en.jsonl
PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json
PRETRAINED_MODEL_PATH: /data/scratch/colossal-llama2/models
SFT_DATASET: /data/scratch/colossal-llama2/colossal_chat_test_data/sft
PROMPT_DATASET: /data/scratch/colossal-llama2/colossal_chat_test_data/prompt
PRETRAIN_DATASET: /data/scratch/colossal-llama2/colossal_chat_test_data/ptx
PREFERENCE_DATASET: /data/scratch/colossal-llama2/colossal_chat_test_data/preference
9 changes: 5 additions & 4 deletions .github/workflows/run_chatgpt_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
options: --gpus all --rm -v /data/scratch/chatgpt:/data/scratch/chatgpt
timeout-minutes: 30
defaults:
Expand All @@ -32,15 +32,16 @@ jobs:

- name: Install ChatGPT
run: |
cd applications/Chat
cd applications/ColossalChat
pip install -v .
pip install -r requirements-test.txt

- name: Execute Unit Testing
run: |
cd applications/Chat
cd applications/ColossalChat
rm -rf ~/.cache/colossalai
pytest tests/
# pytest tests/
# Disabled temporally because some unit tests are not implemented
env:
NCCL_SHM_DISABLE: 1
MAX_JOBS: 8
4 changes: 2 additions & 2 deletions .github/workflows/run_colossalqa_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
image: hpcaitech/pytorch-cuda:1.13.0-11.7.0
volumes:
- /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa
- /data/scratch/llama-tiny:/data/scratch/llama-tiny
Expand Down Expand Up @@ -51,4 +51,4 @@ jobs:
TEST_DATA_PATH_EN: /data/scratch/test_data_colossalqa/companies.txt
TEST_DATA_PATH_ZH: /data/scratch/test_data_colossalqa/companies_zh.txt
TEST_DOCUMENT_LOADER_DATA_PATH: /data/scratch/test_data_colossalqa/tests/*
SQL_FILE_PATH: /data/scratch/test_data_colossalqa/sql_file_path
SQL_FILE_PATH: /data/scratch/test_data_colossalqa/sql_file_path
38 changes: 0 additions & 38 deletions applications/Chat/benchmarks/README.md

This file was deleted.

Loading