Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,18 @@ I will provide the details of each workflow below.

| Workflow Name | File name | Description |
| ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files. It will run all the unit tests in the repository with 4 GPUs. |
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. |
| `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. |
| `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. |

To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache.

1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache/<branch>/`.
2. When creating a new PR or change the base branch of a PR, it copies `cache/<base_ref>/.testmondata*` to `cache/_pull/<pr_number>/`.
3. When running unit tests for each PR, it restores testmon cache from `cache/_pull/<pr_number>/`. After the test, it stores the cache back to `cache/_pull/<pr_number>/`.
4. When a PR is closed, if it's merged, it copies `cache/_pull/<pr_number>/.testmondata*` to `cache/<base_ref>/`. Otherwise, it just removes `cache/_pull/<pr_number>`.
5. When a branch is deleted, it removes `cache/<ref>`.

### Example Test

| Workflow Name | File name | Description |
Expand Down
118 changes: 113 additions & 5 deletions .github/workflows/build_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Build on PR

on:
pull_request:
types: [synchronize, opened, reopened]
types: [synchronize, opened, reopened, ready_for_review, closed, edited]
branches:
- "main"
- "develop"
Expand All @@ -18,11 +18,63 @@ on:
- "!tests/**.md" # ignore doc change
- "pytest.ini" # test config change
- "setup.py" # install command change
create:
delete:

jobs:
prepare_cache:
name: Prepare testmon cache
if: |
github.event_name == 'create' &&
github.event.ref_type == 'branch' &&
github.event.repository.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Copy testmon cache
run: | # branch name may contain slash, we need to replace it with space
export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /")
if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then
[ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}"
fi
env:
MAIN_BRANCH: ${{ github.event.master_branch }}

prepare_cache_for_pr:
name: Prepare testmon cache for PR
if: |
github.event_name == 'pull_request' &&
(github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Copy testmon cache
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
if [ -d "/github/home/testmon_cache/${BASE}" ]; then
[ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER}
fi
env:
PR_NUMBER: ${{ github.event.pull_request.head.ref }}

detect:
name: Detect file change
if: |
github.event_name == 'pull_request' &&
(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
github.event.pull_request.draft == false &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
outputs:
Expand Down Expand Up @@ -135,9 +187,11 @@ jobs:

- name: Restore Testmon Cache
run: |
if [ -d /github/home/testmon_cache ]; then
[ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata* /__w/ColossalAI/ColossalAI/
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/
fi
env:
PR_NUMBER: ${{ github.event.number }}

- name: Execute Unit Testing
run: |
Expand All @@ -149,8 +203,10 @@ jobs:

- name: Store Testmon Cache
run: |
[ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER}
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/
env:
PR_NUMBER: ${{ github.event.number }}

- name: Collate artifact
env:
Expand Down Expand Up @@ -188,3 +244,55 @@ jobs:
with:
name: report
path: report/

store_cache:
name: Store testmon cache for PR
if: |
github.event_name == 'pull_request' &&
github.event.action == 'closed' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Store testmon cache if possible
if: github.event.pull_request.merged == true
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/"
fi
env:
PR_NUMBER: ${{ github.event.pull_request.number }}

- name: Remove testmon cache
if: github.event.pull_request.merged != true
run: |
rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER}
env:
PR_NUMBER: ${{ github.event.pull_request.number }}

remove_cache:
name: Remove testmon cache
if: |
github.event_name == 'delete' &&
github.event.ref_type == 'branch' &&
github.event.repository.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Remove testmon cache
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /")
rm -rf "/github/home/testmon_cache/${BASE}"