From 856c2704dfe672f49749afc6594f1d58d07a6eda Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 16:38:29 +0800 Subject: [PATCH 01/10] [devops] improving testmon cache --- .github/workflows/build_on_pr.yml | 139 ++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index a5a17d176c9d..b281c8bcb780 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -2,7 +2,7 @@ name: Build on PR on: pull_request: - types: [synchronize, opened, reopened] + types: [synchronize, opened, reopened, ready_for_review, closed] branches: - "main" - "develop" @@ -18,11 +18,83 @@ on: - "!tests/**.md" # ignore doc change - "pytest.ini" # test config change - "setup.py" # install command change + create: + delete: jobs: + prepare_cache: + name: Prepare testmon cache + if: | + github.event_name == 'create' && + github.event.ref_type == 'branch' && + github.event.repository.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Copy testmon cache + run: | + if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} /github/home/testmon_cache/${REF_BRANCH} + fi + env: + MAIN_BRANCH: ${{ github.event.master_branch }} + REF_BRANCH: ${{ github.event.ref }} + + test_edit: + name: Test edit + if: | + github.event_name == 'pull_request' && + github.event.action == 'edited' && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Copy testmon cache + run: | + echo ${{ github.event.changes.base }} + echo ${{ github.event.changes.base.ref.from }} + + prepare_cache_for_pr: + name: Prepare testmon cache for PR + if: | + github.event_name == 'pull_request' && + (github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Copy testmon cache + run: | + if [ -d /github/home/testmon_cache/${BASE} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/${BASE})" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r /github/home/testmon_cache/${BASE} /github/home/testmon_cache/_pull/${PR_NUMBER} + fi + env: + BASE: ${{ github.event.pull_request.base.ref }} + PR_NUMBER: ${{ github.event.pull_request.head.ref }} + detect: name: Detect file change if: | + github.event_name == 'pull_request' && + (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') && github.event.pull_request.draft == false && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' outputs: @@ -135,9 +207,11 @@ jobs: - name: Restore Testmon Cache run: | - if [ -d /github/home/testmon_cache ]; then - [ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata* /__w/ColossalAI/ColossalAI/ + if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/ fi + env: + PR_NUMBER: ${{ github.event.number }} - name: Execute Unit Testing run: | @@ -149,8 +223,10 @@ jobs: - name: Store Testmon Cache run: | - [ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache - cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/ + mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER} + cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/ + env: + PR_NUMBER: ${{ github.event.number }} - name: Collate artifact env: @@ -188,3 +264,56 @@ jobs: with: name: report path: report/ + + store_cache: + name: Store testmon cache for PR + if: | + github.event_name == 'pull_request' && + github.event.action == 'closed' && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Store testmon cache if possible + if: github.event.pull_request.merged == true + run: | + if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /github/home/testmon_cache/${BASE}/ + fi + env: + BASE: ${{ github.event.pull_request.base.ref }} + PR_NUMBER: ${{ github.event.pull_request.number }} + + - name: Remove testmon cache + if: github.event.pull_request.merged != true + run: | + rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER} + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + + remove_cache: + name: Remove testmon cache + if: | + github.event_name == 'delete' && + github.event.ref_type == 'branch' && + github.event.repository.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Remove testmon cache + run: | + rm -rf /github/home/testmon_cache/${BASE} + env: + BASE: ${{ github.event.ref }} From ea8fd098c7bf455d50b14c58679e10b3efd74c3a Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 17:01:39 +0800 Subject: [PATCH 02/10] [devops] fix branch name with slash --- .github/workflows/build_on_pr.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index b281c8bcb780..798c4ceceb5f 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -40,11 +40,11 @@ jobs: - name: Copy testmon cache run: | if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then - [ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} /github/home/testmon_cache/${REF_BRANCH} + [ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}" fi env: MAIN_BRANCH: ${{ github.event.master_branch }} - REF_BRANCH: ${{ github.event.ref }} + REF_BRANCH: echo ${{ github.event.ref }} | sed "s/\// /" # replace '/' with ' ' test_edit: name: Test edit @@ -83,11 +83,11 @@ jobs: steps: - name: Copy testmon cache run: | - if [ -d /github/home/testmon_cache/${BASE} ]; then - [ ! -z "$(ls -A /github/home/testmon_cache/${BASE})" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r /github/home/testmon_cache/${BASE} /github/home/testmon_cache/_pull/${PR_NUMBER} + if [ -d "/github/home/testmon_cache/${BASE}" ]; then + [ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER} fi env: - BASE: ${{ github.event.pull_request.base.ref }} + BASE: echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /" # replace '/' with ' ' PR_NUMBER: ${{ github.event.pull_request.head.ref }} detect: @@ -284,10 +284,10 @@ jobs: if: github.event.pull_request.merged == true run: | if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then - [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /github/home/testmon_cache/${BASE}/ + [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/" fi env: - BASE: ${{ github.event.pull_request.base.ref }} + BASE: echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /" # replace '/' with ' ' PR_NUMBER: ${{ github.event.pull_request.number }} - name: Remove testmon cache @@ -314,6 +314,6 @@ jobs: steps: - name: Remove testmon cache run: | - rm -rf /github/home/testmon_cache/${BASE} + rm -rf "/github/home/testmon_cache/${BASE}" env: - BASE: ${{ github.event.ref }} + BASE: echo ${{ github.event.ref }} | sed "s/\// /" # replace '/' with ' ' From 25c8ca3e2b195f2b72573341d2b84e8e96227b9a Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 17:21:55 +0800 Subject: [PATCH 03/10] [devops] fix branch name with slash --- .github/workflows/build_on_pr.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index 798c4ceceb5f..abf55a7d49fd 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -38,13 +38,13 @@ jobs: shell: bash steps: - name: Copy testmon cache - run: | + run: | # branch name may contain slash, we need to replace it with space + export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /") if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then [ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}" fi env: MAIN_BRANCH: ${{ github.event.master_branch }} - REF_BRANCH: echo ${{ github.event.ref }} | sed "s/\// /" # replace '/' with ' ' test_edit: name: Test edit @@ -82,12 +82,12 @@ jobs: shell: bash steps: - name: Copy testmon cache - run: | + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /") if [ -d "/github/home/testmon_cache/${BASE}" ]; then [ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER} fi env: - BASE: echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /" # replace '/' with ' ' PR_NUMBER: ${{ github.event.pull_request.head.ref }} detect: @@ -282,12 +282,12 @@ jobs: steps: - name: Store testmon cache if possible if: github.event.pull_request.merged == true - run: | + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /") if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/" fi env: - BASE: echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /" # replace '/' with ' ' PR_NUMBER: ${{ github.event.pull_request.number }} - name: Remove testmon cache @@ -313,7 +313,6 @@ jobs: shell: bash steps: - name: Remove testmon cache - run: | + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /") rm -rf "/github/home/testmon_cache/${BASE}" - env: - BASE: echo ${{ github.event.ref }} | sed "s/\// /" # replace '/' with ' ' From e71831f7cd2c8e75faf2d2814d74c41c27c5a332 Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 17:40:14 +0800 Subject: [PATCH 04/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index abf55a7d49fd..d8b5e0778126 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -70,7 +70,7 @@ jobs: name: Prepare testmon cache for PR if: | github.event_name == 'pull_request' && - (github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) && + (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: From 719602bae2fbd2550a8a3d6c8d72c81bed8566b8 Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 18:03:22 +0800 Subject: [PATCH 05/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index d8b5e0778126..943ebd2764e1 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -50,7 +50,6 @@ jobs: name: Test edit if: | github.event_name == 'pull_request' && - github.event.action == 'edited' && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: @@ -63,6 +62,7 @@ jobs: steps: - name: Copy testmon cache run: | + echi ${{ github.event.action }} echo ${{ github.event.changes.base }} echo ${{ github.event.changes.base.ref.from }} From 436a3b14f545b9383145a39cd57f43ff7fc22aac Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 18:25:51 +0800 Subject: [PATCH 06/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index 943ebd2764e1..eef58f82cf62 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -2,7 +2,7 @@ name: Build on PR on: pull_request: - types: [synchronize, opened, reopened, ready_for_review, closed] + types: [synchronize, opened, reopened, ready_for_review, closed, edited] branches: - "main" - "develop" From c124a2e26a40d9fbe021f8e1be5ea51c6c0d6bee Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 18:26:07 +0800 Subject: [PATCH 07/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index eef58f82cf62..f08824e2013e 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -62,7 +62,7 @@ jobs: steps: - name: Copy testmon cache run: | - echi ${{ github.event.action }} + echo ${{ github.event.action }} echo ${{ github.event.changes.base }} echo ${{ github.event.changes.base.ref.from }} From cb90eec9bc5157b230e1aa790daa8e7794e45337 Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 18:29:41 +0800 Subject: [PATCH 08/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index f08824e2013e..d58a2319c021 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -50,6 +50,8 @@ jobs: name: Test edit if: | github.event_name == 'pull_request' && + github.event.action == 'edited' && + github.event.changes.base != null && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: From 98ddc1d02523d5643a267445683963dc746a6ad0 Mon Sep 17 00:00:00 2001 From: ver217 Date: Mon, 5 Jun 2023 19:28:01 +0800 Subject: [PATCH 09/10] [devops] fix edit action --- .github/workflows/build_on_pr.yml | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index d58a2319c021..b5f293107310 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -46,33 +46,11 @@ jobs: env: MAIN_BRANCH: ${{ github.event.master_branch }} - test_edit: - name: Test edit - if: | - github.event_name == 'pull_request' && - github.event.action == 'edited' && - github.event.changes.base != null && - github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' - runs-on: [self-hosted, gpu] - container: - image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 - options: --rm - timeout-minutes: 5 - defaults: - run: - shell: bash - steps: - - name: Copy testmon cache - run: | - echo ${{ github.event.action }} - echo ${{ github.event.changes.base }} - echo ${{ github.event.changes.base.ref.from }} - prepare_cache_for_pr: name: Prepare testmon cache for PR if: | github.event_name == 'pull_request' && - (github.event.action == 'opened' || github.event.action == 'reopened') && + (github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: From 9c31858aa831439d05721cc750cecbcd4b80196c Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 6 Jun 2023 11:24:17 +0800 Subject: [PATCH 10/10] [devops] update readme --- .github/workflows/README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index f40f4cc86d1b..3fad7e36f14c 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -43,10 +43,18 @@ I will provide the details of each workflow below. | Workflow Name | File name | Description | | ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files. It will run all the unit tests in the repository with 4 GPUs. | +| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. | | `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. | | `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. | +To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache. + +1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache//`. +2. When creating a new PR or change the base branch of a PR, it copies `cache//.testmondata*` to `cache/_pull//`. +3. When running unit tests for each PR, it restores testmon cache from `cache/_pull//`. After the test, it stores the cache back to `cache/_pull//`. +4. When a PR is closed, if it's merged, it copies `cache/_pull//.testmondata*` to `cache//`. Otherwise, it just removes `cache/_pull/`. +5. When a branch is deleted, it removes `cache/`. + ### Example Test | Workflow Name | File name | Description |