From ea7c497ab7f63dff6123cbc1a788f39b80b41414 Mon Sep 17 00:00:00 2001 From: FrankLeeeee Date: Mon, 6 Feb 2023 16:51:47 +0800 Subject: [PATCH 1/2] [workflow] added cuda extension build test before release --- .bdist.json | 24 -------- .cuda_ext.json | 16 ++++++ .github/workflows/release_bdist.yml | 86 +++++------------------------ 3 files changed, 30 insertions(+), 96 deletions(-) delete mode 100644 .bdist.json create mode 100644 .cuda_ext.json diff --git a/.bdist.json b/.bdist.json deleted file mode 100644 index 8693bca489e8..000000000000 --- a/.bdist.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "build": [ - { - "torch_version": "1.11.0", - "cuda_image": "hpcaitech/cuda-conda:10.2" - }, - { - "torch_version": "1.11.0", - "cuda_image": "hpcaitech/cuda-conda:11.3" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:10.2" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:11.3" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:11.6" - } - ] -} diff --git a/.cuda_ext.json b/.cuda_ext.json new file mode 100644 index 000000000000..eba19cf05e31 --- /dev/null +++ b/.cuda_ext.json @@ -0,0 +1,16 @@ +{ + "build": [ + { + "torch_command": "pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102", + "cuda_image": "hpcaitech/cuda-conda:10.2" + }, + { + "torch_command": "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113", + "cuda_image": "hpcaitech/cuda-conda:11.3" + }, + { + "torch_command": "pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116", + "cuda_image": "hpcaitech/cuda-conda:11.6" + } + ] +} diff --git a/.github/workflows/release_bdist.yml b/.github/workflows/release_bdist.yml index c9c51df8d074..3faf2eb4c468 100644 --- a/.github/workflows/release_bdist.yml +++ b/.github/workflows/release_bdist.yml @@ -2,98 +2,40 @@ name: Release bdist wheel on: workflow_dispatch: - inputs: - torch_version: - type: string - description: torch version, separated by comma - required: true - default: "all" - cuda_version: - type: string - description: cuda version, separated by comma - required: true - github_ref: - type: string - description: Branch or Tag - default: 'main' - required: true + pull_request: + paths: + - 'version.txt' jobs: matrix_preparation: name: Prepare Container List + if: github.repository == 'hpcaitech/ColossalAI' runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - - id: set-matrix - env: - TORCH_VERSIONS: ${{ inputs.torch_version }} - CUDA_VERSIONS: ${{ inputs.cuda_version }} - run: | - echo $TORCH_VERSIONS - echo $CUDA_VERSIONS - IFS=',' - DOCKER_IMAGE=() - - for cv in $CUDA_VERSIONS - do - DOCKER_IMAGE+=("\"hpcaitech/cuda-conda:${cv}\"") - done - - container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" ) - container="[${container}]" - echo "$container" - echo "::set-output name=matrix::{\"container\":$(echo "$container")}" + - uses: actions/checkout@v3 + - id: set-matrix + run: | + cuda_ext=$(cat .cuda_ext.json | tr '\n' ' ') + echo "matrix=${cuda_ext}" >> $GITHUB_OUTPUT build: name: Release bdist wheels needs: matrix_preparation - if: github.repository == 'hpcaitech/ColossalAI' && contains(fromJson('["FrankLeeeee", "ver217", "feifeibear", "kurisusnowdeng"]'), github.actor) runs-on: [self-hosted, gpu] strategy: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} container: - image: ${{ matrix.container }} + image: ${{ matrix.build.cuda_image }} options: --gpus all --rm steps: - uses: actions/checkout@v2 - with: - fetch-depth: 0 - # cub is for cuda 10.2 - - name: Copy scripts and checkout - run: | - cp -r ./.github/workflows/scripts/* ./ - - # link the cache diretories to current path - ln -s /github/home/conda_pkgs ./conda_pkgs - ln -s /github/home/pip_wheels ./pip_wheels - - # set the conda package path - echo "pkgs_dirs:\n - $PWD/conda_pkgs" > ~/.condarc - - # set safe directory - git config --global --add safe.directory /__w/ColossalAI/ColossalAI - # check out - git checkout $git_ref + - name: Install PyTorch + run: eval ${{ matrix.build.torch_command }} - # get cub package for cuda 10.2 - wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip - unzip 1.8.0.zip - env: - git_ref: ${{ github.event.inputs.github_ref }} - - name: Build bdist wheel + - name: Build run: | - pip install beautifulsoup4 requests packaging - python ./build_colossalai_wheel.py --torch_version $TORCH_VERSIONS - env: - TORCH_VERSIONS: ${{ inputs.torch_version }} - - name: 🚀 Deploy - uses: garygrossgarten/github-action-scp@release - with: - local: all_dist - remote: ${{ secrets.PRIVATE_PYPI_DIR }} - host: ${{ secrets.PRIVATE_PYPI_HOST }} - username: ${{ secrets.PRIVATE_PYPI_USER }} - password: ${{ secrets.PRIVATE_PYPI_PASSWD }} + CUDA_EXT=1 pip install -v . From e0d6a7574d6e85bee86120714f4e5ee66830ce3d Mon Sep 17 00:00:00 2001 From: FrankLeeeee Date: Mon, 6 Feb 2023 17:06:36 +0800 Subject: [PATCH 2/2] polish code --- .../{release_bdist.yml => cuda_ext_check_before_merge.yml} | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename .github/workflows/{release_bdist.yml => cuda_ext_check_before_merge.yml} (95%) diff --git a/.github/workflows/release_bdist.yml b/.github/workflows/cuda_ext_check_before_merge.yml similarity index 95% rename from .github/workflows/release_bdist.yml rename to .github/workflows/cuda_ext_check_before_merge.yml index 3faf2eb4c468..eba5bb98ec07 100644 --- a/.github/workflows/release_bdist.yml +++ b/.github/workflows/cuda_ext_check_before_merge.yml @@ -1,4 +1,4 @@ -name: Release bdist wheel +name: Check CUDA Extension Build Before Merge on: workflow_dispatch: @@ -15,6 +15,7 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - uses: actions/checkout@v3 + - id: set-matrix run: | cuda_ext=$(cat .cuda_ext.json | tr '\n' ' ')