From db2241229812e6e4e53a126b77c56ff3a48001db Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:02:33 +0000 Subject: [PATCH 01/19] add a doc build workflow --- .github/workflows/gh-build-and-test.yml | 16 ++++- .github/workflows/gh-build-docs.yml | 79 +++++++++++++++++++++++ cuda_bindings/docs/build_docs.sh | 77 ++++++++++++++--------- cuda_core/docs/build_docs.sh | 75 +++++++++++++--------- cuda_python/docs/build_all_docs.sh | 48 +++++++------- cuda_python/docs/build_docs.sh | 83 +++++++++++++++---------- 6 files changed, 262 insertions(+), 116 deletions(-) create mode 100644 .github/workflows/gh-build-docs.yml diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 51f4bd87b4..238796bfdc 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -192,7 +192,7 @@ jobs: runner: H100 name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) # The build stage could fail but we want the CI to keep moving. - if: ${{ (github.repository_owner == 'nvidia') && always() }} + if: ${{ github.repository_owner == 'nvidia' && always() }} permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout @@ -209,7 +209,7 @@ jobs: needs: - build steps: - - name: Run nvidia-smi to make sure GPU is working + - name: Ensure GPU is working shell: bash --noprofile --norc -xeuo pipefail {0} run: nvidia-smi @@ -322,3 +322,15 @@ jobs: # pip install "cupy-cuda${TEST_CUDA_MAJOR}x" pytest -rxXs tests/ popd + + doc: + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + needs: + - build + secrets: inherit + uses: + .github/workflows/gh-build-docs.yml + with: + build_ctk_ver: ${{ needs.build.outputs.BUILD_CTK_VER }} diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml new file mode 100644 index 0000000000..25b1075252 --- /dev/null +++ b/.github/workflows/gh-build-docs.yml @@ -0,0 +1,79 @@ +on: + workflow_call: + inputs: + - build_ctk_ver: + type: string + required: true + +jobs: + doc: + name: Build & publish docs + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && always() }} + runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up miniforge + uses: conda-incubator/setup-miniconda@v3 + with: + activate-environment: cuda-python-docs + environment-file: ./cuda_python/docs/environment-docs.yml + miniforge-version: latest + conda-remove-defaults: "true" + python-version: 3.12 + + - name: Set environment variables + run: | + PYTHON_VERSION_FORMATTED="312" # see above + REPO_DIR=$(pwd) + + # make outputs from the previous job as env vars + echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-linux-64-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build_ctk_ver }}-linux-64-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV + + - name: Download cuda.bindings build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.bindings artifacts + run: | + pwd + ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR + + - name: Download cuda.core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.core build artifacts + run: | + pwd + ls -lahR $CUDA_CORE_ARTIFACTS_DIR + + - name: Install all packages + run: | + pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + pip install *.whl + popd + + pushd "${CUDA_CORE_ARTIFACTS_DIR}" + pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] + popd + + - name: Build all docs + run: | + cd cuda_python/docs/ + ./build_all_docs.sh latest-only + ls -l build diff --git a/cuda_bindings/docs/build_docs.sh b/cuda_bindings/docs/build_docs.sh index 9f618c5589..eb45a259ac 100755 --- a/cuda_bindings/docs/build_docs.sh +++ b/cuda_bindings/docs/build_docs.sh @@ -2,33 +2,50 @@ set -ex -# SPHINX_CUDA_BINDINGS_VER is used to create a subdir under build/html -# (the Makefile file for sphinx-build also honors it if defined). -# If there's a post release (ex: .post1) we don't want it to show up in the -# version selector or directory structure. -if [[ -z "${SPHINX_CUDA_BINDINGS_VER}" ]]; then - export SPHINX_CUDA_BINDINGS_VER=$(python -c "from importlib.metadata import version; \ - ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ - print(ver)" \ - | awk -F'+' '{print $1}') -fi - -# build the docs (in parallel) -SPHINXOPTS="-j 4" make html - -# for debugging/developing (conf.py), please comment out the above line and -# use the line below instead, as we must build in serial to avoid getting -# obsecure Sphinx errors -#SPHINXOPTS="-v" make html - -# to support version dropdown menu -cp ./versions.json build/html - -# to have a redirection page (to the latest docs) -cp source/_templates/main.html build/html/index.html - -# ensure that the latest docs is the one we built -cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest - -# ensure that the Sphinx reference uses the latest docs -cp build/html/latest/objects.inv build/html +build_docs() { + if [[ "$#" == "0" ]]; then + LATEST_ONLY="1" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="0" + else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 + fi + + # SPHINX_CUDA_BINDINGS_VER is used to create a subdir under build/html + # (the Makefile file for sphinx-build also honors it if defined). + # If there's a post release (ex: .post1) we don't want it to show up in the + # version selector or directory structure. + if [[ -z "${SPHINX_CUDA_BINDINGS_VER}" ]]; then + export SPHINX_CUDA_BINDINGS_VER=$(python -c "from importlib.metadata import version; \ + ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ + print(ver)" \ + | awk -F'+' '{print $1}') + fi + + # build the docs (in parallel) + SPHINXOPTS="-j 4" make html + + # for debugging/developing (conf.py), please comment out the above line and + # use the line below instead, as we must build in serial to avoid getting + # obsecure Sphinx errors + #SPHINXOPTS="-v" make html + + # to support version dropdown menu + cp ./versions.json build/html + + # to have a redirection page (to the latest docs) + cp source/_templates/main.html build/html/index.html + + # ensure that the latest docs is the one we built + if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest + else + mv build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest + fi + + # ensure that the Sphinx reference uses the latest docs + cp build/html/latest/objects.inv build/html +} + +build_docs $@ diff --git a/cuda_core/docs/build_docs.sh b/cuda_core/docs/build_docs.sh index d4cd562747..739fb5abbe 100755 --- a/cuda_core/docs/build_docs.sh +++ b/cuda_core/docs/build_docs.sh @@ -2,32 +2,49 @@ set -ex -# SPHINX_CUDA_CORE_VER is used to create a subdir under build/html -# (the Makefile file for sphinx-build also honors it if defined) -if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then - export SPHINX_CUDA_CORE_VER=$(python -c "from importlib.metadata import version; print(version('cuda-core'))" \ - | awk -F'+' '{print $1}') -fi - -# build the docs (in parallel) -SPHINXOPTS="-j 4" make html - -# for debugging/developing (conf.py), please comment out the above line and -# use the line below instead, as we must build in serial to avoid getting -# obsecure Sphinx errors -#SPHINXOPTS="-v" make html - -# to support version dropdown menu -cp ./versions.json build/html - -# to have a redirection page (to the latest docs) -cp source/_templates/main.html build/html/index.html - -# ensure that the latest docs is the one we built -cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest - -# ensure that the Sphinx reference uses the latest docs -cp build/html/latest/objects.inv build/html - -# clean up previously auto-generated files -rm -rf source/generated/ +build_docs() { + if [[ "$#" == "0" ]]; then + LATEST_ONLY="1" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="0" + else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 + fi + + # SPHINX_CUDA_CORE_VER is used to create a subdir under build/html + # (the Makefile file for sphinx-build also honors it if defined) + if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then + export SPHINX_CUDA_CORE_VER=$(python -c "from importlib.metadata import version; print(version('cuda-core'))" \ + | awk -F'+' '{print $1}') + fi + + # build the docs (in parallel) + SPHINXOPTS="-j 4" make html + + # for debugging/developing (conf.py), please comment out the above line and + # use the line below instead, as we must build in serial to avoid getting + # obsecure Sphinx errors + #SPHINXOPTS="-v" make html + + # to support version dropdown menu + cp ./versions.json build/html + + # to have a redirection page (to the latest docs) + cp source/_templates/main.html build/html/index.html + + # ensure that the latest docs is the one we built + if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest + else + mv build/html/${SPHINX_CUDA_CORE_VER} build/html/latest + fi + + # ensure that the Sphinx reference uses the latest docs + cp build/html/latest/objects.inv build/html + + # clean up previously auto-generated files + rm -rf source/generated/ +} + +build_docs $@ diff --git a/cuda_python/docs/build_all_docs.sh b/cuda_python/docs/build_all_docs.sh index 030463b7d7..03d82fe799 100755 --- a/cuda_python/docs/build_all_docs.sh +++ b/cuda_python/docs/build_all_docs.sh @@ -2,26 +2,30 @@ set -ex -# build cuda-python docs -rm -rf build -./build_docs.sh +build_all_docs() { + # build cuda-python docs + rm -rf build + ./build_docs.sh $@ + + # build cuda-bindings docs + CUDA_BINDINGS_PATH=build/html/cuda-bindings + mkdir -p $CUDA_BINDINGS_PATH + pushd . + cd ../../cuda_bindings/docs + rm -rf build + ./build_docs.sh $@ + cp -r build/html/* "$(dirs -l +1)"/$CUDA_BINDINGS_PATH + popd + + # build cuda-core docs + CUDA_CORE_PATH=build/html/cuda-core + mkdir -p $CUDA_CORE_PATH + pushd . + cd ../../cuda_core/docs + rm -rf build + ./build_docs.sh $@ + cp -r build/html/* "$(dirs -l +1)"/$CUDA_CORE_PATH + popd +} -# build cuda-bindings docs -CUDA_BINDINGS_PATH=build/html/cuda-bindings -mkdir -p $CUDA_BINDINGS_PATH -pushd . -cd ../../cuda_bindings/docs -rm -rf build -./build_docs.sh -cp -r build/html/* "$(dirs -l +1)"/$CUDA_BINDINGS_PATH -popd - -# build cuda-core docs -CUDA_CORE_PATH=build/html/cuda-core -mkdir -p $CUDA_CORE_PATH -pushd . -cd ../../cuda_core/docs -rm -rf build -./build_docs.sh -cp -r build/html/* "$(dirs -l +1)"/$CUDA_CORE_PATH -popd +build_all_docs $@ diff --git a/cuda_python/docs/build_docs.sh b/cuda_python/docs/build_docs.sh index 09ed3bbfad..a4d343e85c 100755 --- a/cuda_python/docs/build_docs.sh +++ b/cuda_python/docs/build_docs.sh @@ -2,36 +2,53 @@ set -ex -# SPHINX_CUDA_PYTHON_VER is used to create a subdir under build/html -# (the Makefile file for sphinx-build also honors it if defined). -# If there's a post release (ex: .post1) we don't want it to show up in the -# version selector or directory structure. -if [[ -z "${SPHINX_CUDA_PYTHON_VER}" ]]; then - export SPHINX_CUDA_PYTHON_VER=$(python -c "from importlib.metadata import version; \ - ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ - print(ver)" \ - | awk -F'+' '{print $1}') -fi - -# build the docs (in parallel) -SPHINXOPTS="-j 4" make html - -# for debugging/developing (conf.py), please comment out the above line and -# use the line below instead, as we must build in serial to avoid getting -# obsecure Sphinx errors -#SPHINXOPTS="-v" make html - -# to support version dropdown menu -cp ./versions.json build/html - -# to have a redirection page (to the latest docs) -cp source/_templates/main.html build/html/index.html - -# ensure that the latest docs is the one we built -cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest - -# ensure that the Sphinx reference uses the latest docs -cp build/html/latest/objects.inv build/html - -# clean up previously auto-generated files -rm -rf source/generated/ +build_docs() { + if [[ "$#" == "0" ]]; then + LATEST_ONLY="1" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="0" + else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 + fi + + # SPHINX_CUDA_PYTHON_VER is used to create a subdir under build/html + # (the Makefile file for sphinx-build also honors it if defined). + # If there's a post release (ex: .post1) we don't want it to show up in the + # version selector or directory structure. + if [[ -z "${SPHINX_CUDA_PYTHON_VER}" ]]; then + export SPHINX_CUDA_PYTHON_VER=$(python -c "from importlib.metadata import version; \ + ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ + print(ver)" \ + | awk -F'+' '{print $1}') + fi + + # build the docs (in parallel) + SPHINXOPTS="-j 4" make html + + # for debugging/developing (conf.py), please comment out the above line and + # use the line below instead, as we must build in serial to avoid getting + # obsecure Sphinx errors + #SPHINXOPTS="-v" make html + + # to support version dropdown menu + cp ./versions.json build/html + + # to have a redirection page (to the latest docs) + cp source/_templates/main.html build/html/index.html + + # ensure that the latest docs is the one we built + if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest + else + mv build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest + fi + + # ensure that the Sphinx reference uses the latest docs + cp build/html/latest/objects.inv build/html + + # clean up previously auto-generated files + rm -rf source/generated/ +} + +build_docs $@ From cba26a01d7127eed6a0c801059a0763bf55dacec Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:08:23 +0000 Subject: [PATCH 02/19] keep build artifact for later use --- .github/workflows/gh-build-docs.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 25b1075252..ee7b16966a 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -6,8 +6,8 @@ on: required: true jobs: - doc: - name: Build & publish docs + build: + name: Build docs # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && always() }} runs-on: ubuntu-latest @@ -73,7 +73,15 @@ jobs: popd - name: Build all docs + id: build run: | - cd cuda_python/docs/ + pushd cuda_python/docs/ ./build_all_docs.sh latest-only ls -l build + popd + + - name: Upload doc artifacts + uses: actions/upload-pages-artifact@v3 + with: + path: cuda_python/docs/build/html/ + retention-days: 3 From f8f546b7bf5abc8e207c6f7f87930d7bdf37830b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:16:25 +0000 Subject: [PATCH 03/19] skip test stage for now --- .github/workflows/gh-build-and-test.yml | 322 ++++++++++++------------ 1 file changed, 161 insertions(+), 161 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 238796bfdc..543f346ea1 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -162,166 +162,166 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT - test: - strategy: - fail-fast: false - # TODO: add driver version here - matrix: - host-platform: - - linux-64 - - linux-aarch64 - # TODO: enable testing once win-64 GPU runners are up - # - win-64 - python-version: - - "3.13" - - "3.12" - - "3.11" - - "3.10" - - "3.9" - cuda-version: - # Note: this is for test-time only. - - "12.6.2" - - "12.0.1" - - "11.8.0" - runner: - - default - include: - - host-platform: linux-64 - python-version: "3.12" - cuda-version: "12.6.2" - runner: H100 - name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) - # The build stage could fail but we want the CI to keep moving. - if: ${{ github.repository_owner == 'nvidia' && always() }} - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || - (matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || - (matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} - # Our self-hosted runners require a container - # TODO: use a different (nvidia?) container - container: - options: -u root --security-opt seccomp=unconfined --shm-size 16g - image: ubuntu:22.04 - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - needs: - - build - steps: - - name: Ensure GPU is working - shell: bash --noprofile --norc -xeuo pipefail {0} - run: nvidia-smi - - - name: Checkout ${{ github.event.repository.name }} - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set environment variables - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') - if [[ "${{ matrix.host-platform }}" == linux* ]]; then - REPO_DIR=$(pwd) - elif [[ "${{ matrix.host-platform }}" == win* ]]; then - PWD=$(pwd) - REPO_DIR=$(cygpath -w $PWD) - fi - - BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ needs.build.outputs.BUILD_CTK_VER }})" - TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" - if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then - SKIP_CUDA_BINDINGS_TEST=1 - else - SKIP_CUDA_BINDINGS_TEST=0 - fi - - # make outputs from the previous job as env vars - echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV - echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV - echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV - - - name: Download cuda.bindings build artifacts - uses: actions/download-artifact@v4 - with: - name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} - path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} - - - name: Display structure of downloaded cuda.bindings artifacts - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - pwd - ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR - - - name: Download cuda.core build artifacts - uses: actions/download-artifact@v4 - with: - name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} - path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} - - - name: Display structure of downloaded cuda.core build artifacts - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - pwd - ls -lahR $CUDA_CORE_ARTIFACTS_DIR - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Set up mini CTK - uses: ./.github/actions/fetch_ctk - continue-on-error: false - with: - host-platform: ${{ matrix.host-platform }} - cuda-version: ${{ matrix.cuda-version }} - - - name: Run cuda.bindings tests - if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - ls $CUDA_PATH - - pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" - pip install *.whl - popd - - pushd ./cuda_bindings - pip install -r requirements.txt - pytest -rxXs tests/ - # TODO: enable cython tests - #pytest tests/cython - popd - - - name: Run cuda.core tests - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - if [[ $SKIP_CUDA_BINDINGS_TEST == 1 ]]; then - # TODO: remove this hack once cuda-python has a cp313 build - if [[ ${{ matrix.python-version }} == "3.13" ]]; then - echo "Python 3.13 + cuda-python ${{ matrix.cuda-version }} is not supported, skipping the test..." - exit 0 - fi - fi - - # If build/test majors match: cuda.bindings is installed in the previous step. - # If mismatch: cuda.bindings is installed from PyPI. - TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" - pushd "${CUDA_CORE_ARTIFACTS_DIR}" - pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] - popd - - pushd ./cuda_core - # TODO: add requirements.txt for test deps? - pip install pytest - # TODO: add CuPy to test deps (which would require cuRAND) - # pip install "cupy-cuda${TEST_CUDA_MAJOR}x" - pytest -rxXs tests/ - popd +# test: +# strategy: +# fail-fast: false +# # TODO: add driver version here +# matrix: +# host-platform: +# - linux-64 +# - linux-aarch64 +# # TODO: enable testing once win-64 GPU runners are up +# # - win-64 +# python-version: +# - "3.13" +# - "3.12" +# - "3.11" +# - "3.10" +# - "3.9" +# cuda-version: +# # Note: this is for test-time only. +# - "12.6.2" +# - "12.0.1" +# - "11.8.0" +# runner: +# - default +# include: +# - host-platform: linux-64 +# python-version: "3.12" +# cuda-version: "12.6.2" +# runner: H100 +# name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) +# # The build stage could fail but we want the CI to keep moving. +# if: ${{ github.repository_owner == 'nvidia' && always() }} +# permissions: +# id-token: write # This is required for configure-aws-credentials +# contents: read # This is required for actions/checkout +# runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || +# (matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || +# (matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} +# # Our self-hosted runners require a container +# # TODO: use a different (nvidia?) container +# container: +# options: -u root --security-opt seccomp=unconfined --shm-size 16g +# image: ubuntu:22.04 +# env: +# NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} +# needs: +# - build +# steps: +# - name: Ensure GPU is working +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: nvidia-smi +# +# - name: Checkout ${{ github.event.repository.name }} +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# +# - name: Set environment variables +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: | +# PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') +# if [[ "${{ matrix.host-platform }}" == linux* ]]; then +# REPO_DIR=$(pwd) +# elif [[ "${{ matrix.host-platform }}" == win* ]]; then +# PWD=$(pwd) +# REPO_DIR=$(cygpath -w $PWD) +# fi +# +# BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ needs.build.outputs.BUILD_CTK_VER }})" +# TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" +# if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then +# SKIP_CUDA_BINDINGS_TEST=1 +# else +# SKIP_CUDA_BINDINGS_TEST=0 +# fi +# +# # make outputs from the previous job as env vars +# echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV +# echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV +# echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV +# echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV +# echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV +# +# - name: Download cuda.bindings build artifacts +# uses: actions/download-artifact@v4 +# with: +# name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} +# path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} +# +# - name: Display structure of downloaded cuda.bindings artifacts +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: | +# pwd +# ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR +# +# - name: Download cuda.core build artifacts +# uses: actions/download-artifact@v4 +# with: +# name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} +# path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} +# +# - name: Display structure of downloaded cuda.core build artifacts +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: | +# pwd +# ls -lahR $CUDA_CORE_ARTIFACTS_DIR +# +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v5 +# with: +# python-version: ${{ matrix.python-version }} +# +# - name: Set up mini CTK +# uses: ./.github/actions/fetch_ctk +# continue-on-error: false +# with: +# host-platform: ${{ matrix.host-platform }} +# cuda-version: ${{ matrix.cuda-version }} +# +# - name: Run cuda.bindings tests +# if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: | +# ls $CUDA_PATH +# +# pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" +# pip install *.whl +# popd +# +# pushd ./cuda_bindings +# pip install -r requirements.txt +# pytest -rxXs tests/ +# # TODO: enable cython tests +# #pytest tests/cython +# popd +# +# - name: Run cuda.core tests +# shell: bash --noprofile --norc -xeuo pipefail {0} +# run: | +# if [[ $SKIP_CUDA_BINDINGS_TEST == 1 ]]; then +# # TODO: remove this hack once cuda-python has a cp313 build +# if [[ ${{ matrix.python-version }} == "3.13" ]]; then +# echo "Python 3.13 + cuda-python ${{ matrix.cuda-version }} is not supported, skipping the test..." +# exit 0 +# fi +# fi +# +# # If build/test majors match: cuda.bindings is installed in the previous step. +# # If mismatch: cuda.bindings is installed from PyPI. +# TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" +# pushd "${CUDA_CORE_ARTIFACTS_DIR}" +# pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] +# popd +# +# pushd ./cuda_core +# # TODO: add requirements.txt for test deps? +# pip install pytest +# # TODO: add CuPy to test deps (which would require cuRAND) +# # pip install "cupy-cuda${TEST_CUDA_MAJOR}x" +# pytest -rxXs tests/ +# popd doc: permissions: @@ -331,6 +331,6 @@ jobs: - build secrets: inherit uses: - .github/workflows/gh-build-docs.yml + ./.github/workflows/gh-build-docs.yml with: build_ctk_ver: ${{ needs.build.outputs.BUILD_CTK_VER }} From 8283401a3585f5d35c27f5e930fe289a84b41e0b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:19:54 +0000 Subject: [PATCH 04/19] fix typo --- .github/workflows/gh-build-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index ee7b16966a..b45b21970c 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -1,7 +1,7 @@ on: workflow_call: inputs: - - build_ctk_ver: + build_ctk_ver: type: string required: true From eaf444a86fc6f1c0ce042bcf671d8c05a739db4f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:32:08 +0000 Subject: [PATCH 05/19] reduce build matrix; ensure doc stage is triggered; simplify cuda-core install --- .github/workflows/gh-build-and-test.yml | 14 ++++++++------ .github/workflows/gh-build-docs.yml | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 543f346ea1..4d7fe7341b 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -7,14 +7,14 @@ jobs: matrix: host-platform: - linux-64 - - linux-aarch64 - - win-64 +# - linux-aarch64 +# - win-64 python-version: - - "3.13" +# - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" +# - "3.11" +# - "3.10" +# - "3.9" cuda-version: # Note: this is for build-time only. - "12.6.2" @@ -324,6 +324,8 @@ jobs: # popd doc: + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && always() }} permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index b45b21970c..1296ca4207 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -69,7 +69,7 @@ jobs: popd pushd "${CUDA_CORE_ARTIFACTS_DIR}" - pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] + pip install *.whl popd - name: Build all docs From 9f91113108ccfcd615b9dc709fbafa736cb77245 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:56:31 +0000 Subject: [PATCH 06/19] switch runner & install CTK for doc build --- .github/workflows/gh-build-docs.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 1296ca4207..86a86abf27 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -10,7 +10,9 @@ jobs: name: Build docs # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && always() }} - runs-on: ubuntu-latest + # WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326) + runs-on: linux-amd64-gpu-p100-latest-1 + #runs-on: ubuntu-latest defaults: run: shell: bash -el {0} @@ -29,6 +31,14 @@ jobs: conda-remove-defaults: "true" python-version: 3.12 + # WAR: Building the doc currently requires CTK installed (NVIDIA/cuda-python#326) + - name: Set up mini CTK + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: linux-64 + cuda-version: ${{ inputs.build_ctk_ver }} + - name: Set environment variables run: | PYTHON_VERSION_FORMATTED="312" # see above From ca4febfb6ae2ce14c157c3c9f010398cc7ac141c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 03:59:03 +0000 Subject: [PATCH 07/19] fix latest-only logic --- cuda_bindings/docs/build_docs.sh | 4 ++-- cuda_core/docs/build_docs.sh | 4 ++-- cuda_python/docs/build_docs.sh | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/docs/build_docs.sh b/cuda_bindings/docs/build_docs.sh index eb45a259ac..f835c96f48 100755 --- a/cuda_bindings/docs/build_docs.sh +++ b/cuda_bindings/docs/build_docs.sh @@ -4,9 +4,9 @@ set -ex build_docs() { if [[ "$#" == "0" ]]; then - LATEST_ONLY="1" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then LATEST_ONLY="0" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" else echo "usage: ./build_docs.sh [latest-only]" exit 1 diff --git a/cuda_core/docs/build_docs.sh b/cuda_core/docs/build_docs.sh index 739fb5abbe..1890b02809 100755 --- a/cuda_core/docs/build_docs.sh +++ b/cuda_core/docs/build_docs.sh @@ -4,9 +4,9 @@ set -ex build_docs() { if [[ "$#" == "0" ]]; then - LATEST_ONLY="1" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then LATEST_ONLY="0" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" else echo "usage: ./build_docs.sh [latest-only]" exit 1 diff --git a/cuda_python/docs/build_docs.sh b/cuda_python/docs/build_docs.sh index a4d343e85c..714ba530e3 100755 --- a/cuda_python/docs/build_docs.sh +++ b/cuda_python/docs/build_docs.sh @@ -4,9 +4,9 @@ set -ex build_docs() { if [[ "$#" == "0" ]]; then - LATEST_ONLY="1" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then LATEST_ONLY="0" + elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" else echo "usage: ./build_docs.sh [latest-only]" exit 1 From f03fb80ef6f51cb778947ff036aa697cee8d090b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 04:24:30 +0000 Subject: [PATCH 08/19] switch to T4 runner due to long latency --- .github/workflows/gh-build-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 86a86abf27..1efdb3a205 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -11,7 +11,7 @@ jobs: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && always() }} # WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326) - runs-on: linux-amd64-gpu-p100-latest-1 + runs-on: linux-amd64-gpu-t4-latest-1-testing #runs-on: ubuntu-latest defaults: run: From 7b5a86986437f7c352f8d0e2f58cbc293f6a8002 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 04:33:58 +0000 Subject: [PATCH 09/19] skip concurrency setup to unblock myself --- .github/workflows/ci-gh.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 81cf6da313..f61fd12816 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -1,7 +1,8 @@ name: CI concurrency: - group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} + #group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} + group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: From 21b64b8b824fb24f8759f5fbc775c16114f7f95f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 05:06:02 +0000 Subject: [PATCH 10/19] change artifact layout to match gh-pages; add a deploy job --- .github/workflows/gh-build-and-test.yml | 6 ++++-- .github/workflows/gh-build-docs.yml | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 4d7fe7341b..3c226d2225 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -326,9 +326,11 @@ jobs: doc: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && always() }} + # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout + id-token: write + pages: write + contents: read needs: - build secrets: inherit diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 1efdb3a205..cf736ca1c7 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -82,7 +82,7 @@ jobs: pip install *.whl popd - - name: Build all docs + - name: Build all (latest) docs id: build run: | pushd cuda_python/docs/ @@ -90,8 +90,24 @@ jobs: ls -l build popd + mkdir -p artifacts/docs + mv cuda_python/docs/build/html/* artifacts/docs/ + - name: Upload doc artifacts uses: actions/upload-pages-artifact@v3 with: - path: cuda_python/docs/build/html/ + path: artifacts/ retention-days: 3 + + deploy: + # Only deploy the latest docs when building on main + if: ${{ github.ref_name == 'main' }} + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 From 672b627cc927cdd4f43081113639dbddcf7d74f0 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 05:41:09 +0000 Subject: [PATCH 11/19] add manual deployment borrowed from the array-api repo --- .github/workflows/gh-build-docs.yml | 59 +++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index cf736ca1c7..43c6e21ee1 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -93,21 +93,56 @@ jobs: mkdir -p artifacts/docs mv cuda_python/docs/build/html/* artifacts/docs/ + # Note: currently this is only for manual inspection. This step will become + # required once we switch to use GHA for doc deployment (see the bottom). - name: Upload doc artifacts uses: actions/upload-pages-artifact@v3 with: path: artifacts/ retention-days: 3 - deploy: - # Only deploy the latest docs when building on main - if: ${{ github.ref_name == 'main' }} - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - needs: build - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 + # The steps below are not executed unless when building on main. + - name: Configure git + if: ${{ github.ref_name == 'main' }} + run: | + git config --local user.email "noreply@nvidia.com" + git config --local user.name "cuda-python-bot" + + - name: Checkout the gh-pages branch + if: ${{ github.ref_name == 'main' }} + run: | + git fetch origin gh-pages + git checkout gh-pages + + - name: Move artifacts to doc root + if: ${{ github.ref_name == 'main' }} + run: | + mv artifacts/docs/* docs/ + git status + + - name: Commit changes + if: ${{ github.ref_name == 'main' }} + run: | + git add docs/ + git status + git commit -m "Deploy: ${{ github.sha }}" + continue-on-error: true + + - name: Push changes + if: ${{ github.ref_name == 'main' && success() }} + run: | + git push origin gh-pages + + # TODO: discuss if we want to abandon branch-based doc deployment + # deploy: + # # Only deploy the latest docs when building on main + # if: ${{ github.ref_name == 'main' }} + # environment: + # name: github-pages + # url: ${{ steps.deployment.outputs.page_url }} + # runs-on: ubuntu-latest + # needs: build + # steps: + # - name: Deploy to GitHub Pages + # id: deployment + # uses: actions/deploy-pages@v4 From 79477e6e5dbae1388741d06a0081936567582c4f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 05:47:24 +0000 Subject: [PATCH 12/19] reduce script diff --- cuda_bindings/docs/build_docs.sh | 90 +++++++++++++--------------- cuda_core/docs/build_docs.sh | 88 +++++++++++++-------------- cuda_python/docs/build_all_docs.sh | 48 +++++++-------- cuda_python/docs/build_docs.sh | 96 ++++++++++++++---------------- 4 files changed, 153 insertions(+), 169 deletions(-) diff --git a/cuda_bindings/docs/build_docs.sh b/cuda_bindings/docs/build_docs.sh index f835c96f48..fbd33c4d2d 100755 --- a/cuda_bindings/docs/build_docs.sh +++ b/cuda_bindings/docs/build_docs.sh @@ -2,50 +2,46 @@ set -ex -build_docs() { - if [[ "$#" == "0" ]]; then - LATEST_ONLY="0" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then - LATEST_ONLY="1" - else - echo "usage: ./build_docs.sh [latest-only]" - exit 1 - fi - - # SPHINX_CUDA_BINDINGS_VER is used to create a subdir under build/html - # (the Makefile file for sphinx-build also honors it if defined). - # If there's a post release (ex: .post1) we don't want it to show up in the - # version selector or directory structure. - if [[ -z "${SPHINX_CUDA_BINDINGS_VER}" ]]; then - export SPHINX_CUDA_BINDINGS_VER=$(python -c "from importlib.metadata import version; \ - ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ - print(ver)" \ - | awk -F'+' '{print $1}') - fi - - # build the docs (in parallel) - SPHINXOPTS="-j 4" make html - - # for debugging/developing (conf.py), please comment out the above line and - # use the line below instead, as we must build in serial to avoid getting - # obsecure Sphinx errors - #SPHINXOPTS="-v" make html - - # to support version dropdown menu - cp ./versions.json build/html - - # to have a redirection page (to the latest docs) - cp source/_templates/main.html build/html/index.html - - # ensure that the latest docs is the one we built - if [[ $LATEST_ONLY == "0" ]]; then - cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest - else - mv build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest - fi - - # ensure that the Sphinx reference uses the latest docs - cp build/html/latest/objects.inv build/html -} - -build_docs $@ +if [[ "$#" == "0" ]]; then + LATEST_ONLY="0" +elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" +else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 +fi + +# SPHINX_CUDA_BINDINGS_VER is used to create a subdir under build/html +# (the Makefile file for sphinx-build also honors it if defined). +# If there's a post release (ex: .post1) we don't want it to show up in the +# version selector or directory structure. +if [[ -z "${SPHINX_CUDA_BINDINGS_VER}" ]]; then + export SPHINX_CUDA_BINDINGS_VER=$(python -c "from importlib.metadata import version; \ + ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ + print(ver)" \ + | awk -F'+' '{print $1}') +fi + +# build the docs (in parallel) +SPHINXOPTS="-j 4" make html + +# for debugging/developing (conf.py), please comment out the above line and +# use the line below instead, as we must build in serial to avoid getting +# obsecure Sphinx errors +#SPHINXOPTS="-v" make html + +# to support version dropdown menu +cp ./versions.json build/html + +# to have a redirection page (to the latest docs) +cp source/_templates/main.html build/html/index.html + +# ensure that the latest docs is the one we built +if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest +else + mv build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest +fi + +# ensure that the Sphinx reference uses the latest docs +cp build/html/latest/objects.inv build/html diff --git a/cuda_core/docs/build_docs.sh b/cuda_core/docs/build_docs.sh index 1890b02809..f5595cfba6 100755 --- a/cuda_core/docs/build_docs.sh +++ b/cuda_core/docs/build_docs.sh @@ -2,49 +2,45 @@ set -ex -build_docs() { - if [[ "$#" == "0" ]]; then - LATEST_ONLY="0" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then - LATEST_ONLY="1" - else - echo "usage: ./build_docs.sh [latest-only]" - exit 1 - fi - - # SPHINX_CUDA_CORE_VER is used to create a subdir under build/html - # (the Makefile file for sphinx-build also honors it if defined) - if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then - export SPHINX_CUDA_CORE_VER=$(python -c "from importlib.metadata import version; print(version('cuda-core'))" \ - | awk -F'+' '{print $1}') - fi - - # build the docs (in parallel) - SPHINXOPTS="-j 4" make html - - # for debugging/developing (conf.py), please comment out the above line and - # use the line below instead, as we must build in serial to avoid getting - # obsecure Sphinx errors - #SPHINXOPTS="-v" make html - - # to support version dropdown menu - cp ./versions.json build/html - - # to have a redirection page (to the latest docs) - cp source/_templates/main.html build/html/index.html - - # ensure that the latest docs is the one we built - if [[ $LATEST_ONLY == "0" ]]; then - cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest - else - mv build/html/${SPHINX_CUDA_CORE_VER} build/html/latest - fi - - # ensure that the Sphinx reference uses the latest docs - cp build/html/latest/objects.inv build/html - - # clean up previously auto-generated files - rm -rf source/generated/ -} - -build_docs $@ +if [[ "$#" == "0" ]]; then + LATEST_ONLY="0" +elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" +else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 +fi + +# SPHINX_CUDA_CORE_VER is used to create a subdir under build/html +# (the Makefile file for sphinx-build also honors it if defined) +if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then + export SPHINX_CUDA_CORE_VER=$(python -c "from importlib.metadata import version; print(version('cuda-core'))" \ + | awk -F'+' '{print $1}') +fi + +# build the docs (in parallel) +SPHINXOPTS="-j 4" make html + +# for debugging/developing (conf.py), please comment out the above line and +# use the line below instead, as we must build in serial to avoid getting +# obsecure Sphinx errors +#SPHINXOPTS="-v" make html + +# to support version dropdown menu +cp ./versions.json build/html + +# to have a redirection page (to the latest docs) +cp source/_templates/main.html build/html/index.html + +# ensure that the latest docs is the one we built +if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest +else + mv build/html/${SPHINX_CUDA_CORE_VER} build/html/latest +fi + +# ensure that the Sphinx reference uses the latest docs +cp build/html/latest/objects.inv build/html + +# clean up previously auto-generated files +rm -rf source/generated/ diff --git a/cuda_python/docs/build_all_docs.sh b/cuda_python/docs/build_all_docs.sh index 03d82fe799..733ae1bc87 100755 --- a/cuda_python/docs/build_all_docs.sh +++ b/cuda_python/docs/build_all_docs.sh @@ -2,30 +2,26 @@ set -ex -build_all_docs() { - # build cuda-python docs - rm -rf build - ./build_docs.sh $@ - - # build cuda-bindings docs - CUDA_BINDINGS_PATH=build/html/cuda-bindings - mkdir -p $CUDA_BINDINGS_PATH - pushd . - cd ../../cuda_bindings/docs - rm -rf build - ./build_docs.sh $@ - cp -r build/html/* "$(dirs -l +1)"/$CUDA_BINDINGS_PATH - popd - - # build cuda-core docs - CUDA_CORE_PATH=build/html/cuda-core - mkdir -p $CUDA_CORE_PATH - pushd . - cd ../../cuda_core/docs - rm -rf build - ./build_docs.sh $@ - cp -r build/html/* "$(dirs -l +1)"/$CUDA_CORE_PATH - popd -} +# build cuda-python docs +rm -rf build +./build_docs.sh $@ -build_all_docs $@ +# build cuda-bindings docs +CUDA_BINDINGS_PATH=build/html/cuda-bindings +mkdir -p $CUDA_BINDINGS_PATH +pushd . +cd ../../cuda_bindings/docs +rm -rf build +./build_docs.sh $@ +cp -r build/html/* "$(dirs -l +1)"/$CUDA_BINDINGS_PATH +popd + +# build cuda-core docs +CUDA_CORE_PATH=build/html/cuda-core +mkdir -p $CUDA_CORE_PATH +pushd . +cd ../../cuda_core/docs +rm -rf build +./build_docs.sh $@ +cp -r build/html/* "$(dirs -l +1)"/$CUDA_CORE_PATH +popd diff --git a/cuda_python/docs/build_docs.sh b/cuda_python/docs/build_docs.sh index 714ba530e3..38302049cf 100755 --- a/cuda_python/docs/build_docs.sh +++ b/cuda_python/docs/build_docs.sh @@ -2,53 +2,49 @@ set -ex -build_docs() { - if [[ "$#" == "0" ]]; then - LATEST_ONLY="0" - elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then - LATEST_ONLY="1" - else - echo "usage: ./build_docs.sh [latest-only]" - exit 1 - fi - - # SPHINX_CUDA_PYTHON_VER is used to create a subdir under build/html - # (the Makefile file for sphinx-build also honors it if defined). - # If there's a post release (ex: .post1) we don't want it to show up in the - # version selector or directory structure. - if [[ -z "${SPHINX_CUDA_PYTHON_VER}" ]]; then - export SPHINX_CUDA_PYTHON_VER=$(python -c "from importlib.metadata import version; \ - ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ - print(ver)" \ - | awk -F'+' '{print $1}') - fi - - # build the docs (in parallel) - SPHINXOPTS="-j 4" make html - - # for debugging/developing (conf.py), please comment out the above line and - # use the line below instead, as we must build in serial to avoid getting - # obsecure Sphinx errors - #SPHINXOPTS="-v" make html - - # to support version dropdown menu - cp ./versions.json build/html - - # to have a redirection page (to the latest docs) - cp source/_templates/main.html build/html/index.html - - # ensure that the latest docs is the one we built - if [[ $LATEST_ONLY == "0" ]]; then - cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest - else - mv build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest - fi - - # ensure that the Sphinx reference uses the latest docs - cp build/html/latest/objects.inv build/html - - # clean up previously auto-generated files - rm -rf source/generated/ -} - -build_docs $@ +if [[ "$#" == "0" ]]; then + LATEST_ONLY="0" +elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then + LATEST_ONLY="1" +else + echo "usage: ./build_docs.sh [latest-only]" + exit 1 +fi + +# SPHINX_CUDA_PYTHON_VER is used to create a subdir under build/html +# (the Makefile file for sphinx-build also honors it if defined). +# If there's a post release (ex: .post1) we don't want it to show up in the +# version selector or directory structure. +if [[ -z "${SPHINX_CUDA_PYTHON_VER}" ]]; then + export SPHINX_CUDA_PYTHON_VER=$(python -c "from importlib.metadata import version; \ + ver = '.'.join(str(version('cuda-python')).split('.')[:3]); \ + print(ver)" \ + | awk -F'+' '{print $1}') +fi + +# build the docs (in parallel) +SPHINXOPTS="-j 4" make html + +# for debugging/developing (conf.py), please comment out the above line and +# use the line below instead, as we must build in serial to avoid getting +# obsecure Sphinx errors +#SPHINXOPTS="-v" make html + +# to support version dropdown menu +cp ./versions.json build/html + +# to have a redirection page (to the latest docs) +cp source/_templates/main.html build/html/index.html + +# ensure that the latest docs is the one we built +if [[ $LATEST_ONLY == "0" ]]; then + cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest +else + mv build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest +fi + +# ensure that the Sphinx reference uses the latest docs +cp build/html/latest/objects.inv build/html + +# clean up previously auto-generated files +rm -rf source/generated/ From fb060bf0e6221b154e8bc1f98f922c42d4dc75cb Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 01:03:52 -0500 Subject: [PATCH 13/19] nit: update workflow comments --- .github/workflows/gh-build-docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 43c6e21ee1..1283e1cd60 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -10,7 +10,7 @@ jobs: name: Build docs # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && always() }} - # WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326) + # WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326,327) runs-on: linux-amd64-gpu-t4-latest-1-testing #runs-on: ubuntu-latest defaults: @@ -31,7 +31,7 @@ jobs: conda-remove-defaults: "true" python-version: 3.12 - # WAR: Building the doc currently requires CTK installed (NVIDIA/cuda-python#326) + # WAR: Building the doc currently requires CTK installed (NVIDIA/cuda-python#326,327) - name: Set up mini CTK uses: ./.github/actions/fetch_ctk continue-on-error: false From 953c1f89e54f7cc1bc3690b96f6155d5fdfe145e Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 06:19:25 +0000 Subject: [PATCH 14/19] add a conda env check step --- .github/workflows/gh-build-docs.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 1283e1cd60..69d38a32b1 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -22,6 +22,9 @@ jobs: with: fetch-depth: 0 + # TODO: cache conda env to speed up the workflow once conda-incubator/setup-miniconda#267 + # is resolved + - name: Set up miniforge uses: conda-incubator/setup-miniconda@v3 with: @@ -31,6 +34,13 @@ jobs: conda-remove-defaults: "true" python-version: 3.12 + - name: Check conda env + run: | + conda info + conda list + conda config --show-sources + conda config --show + # WAR: Building the doc currently requires CTK installed (NVIDIA/cuda-python#326,327) - name: Set up mini CTK uses: ./.github/actions/fetch_ctk From c8424da8da7894061ea3ce82b02673e30e9ebabc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 06:22:23 +0000 Subject: [PATCH 15/19] bring back full CI + run nvidia-smi in doc CI --- .github/workflows/gh-build-and-test.yml | 332 ++++++++++++------------ .github/workflows/gh-build-docs.yml | 4 + 2 files changed, 170 insertions(+), 166 deletions(-) diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 3c226d2225..20f69a1cf5 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -7,14 +7,14 @@ jobs: matrix: host-platform: - linux-64 -# - linux-aarch64 -# - win-64 + - linux-aarch64 + - win-64 python-version: -# - "3.13" + - "3.13" - "3.12" -# - "3.11" -# - "3.10" -# - "3.9" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for build-time only. - "12.6.2" @@ -162,166 +162,166 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT -# test: -# strategy: -# fail-fast: false -# # TODO: add driver version here -# matrix: -# host-platform: -# - linux-64 -# - linux-aarch64 -# # TODO: enable testing once win-64 GPU runners are up -# # - win-64 -# python-version: -# - "3.13" -# - "3.12" -# - "3.11" -# - "3.10" -# - "3.9" -# cuda-version: -# # Note: this is for test-time only. -# - "12.6.2" -# - "12.0.1" -# - "11.8.0" -# runner: -# - default -# include: -# - host-platform: linux-64 -# python-version: "3.12" -# cuda-version: "12.6.2" -# runner: H100 -# name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) -# # The build stage could fail but we want the CI to keep moving. -# if: ${{ github.repository_owner == 'nvidia' && always() }} -# permissions: -# id-token: write # This is required for configure-aws-credentials -# contents: read # This is required for actions/checkout -# runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || -# (matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || -# (matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} -# # Our self-hosted runners require a container -# # TODO: use a different (nvidia?) container -# container: -# options: -u root --security-opt seccomp=unconfined --shm-size 16g -# image: ubuntu:22.04 -# env: -# NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} -# needs: -# - build -# steps: -# - name: Ensure GPU is working -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: nvidia-smi -# -# - name: Checkout ${{ github.event.repository.name }} -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# -# - name: Set environment variables -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: | -# PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') -# if [[ "${{ matrix.host-platform }}" == linux* ]]; then -# REPO_DIR=$(pwd) -# elif [[ "${{ matrix.host-platform }}" == win* ]]; then -# PWD=$(pwd) -# REPO_DIR=$(cygpath -w $PWD) -# fi -# -# BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ needs.build.outputs.BUILD_CTK_VER }})" -# TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" -# if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then -# SKIP_CUDA_BINDINGS_TEST=1 -# else -# SKIP_CUDA_BINDINGS_TEST=0 -# fi -# -# # make outputs from the previous job as env vars -# echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV -# echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV -# echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV -# echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV -# echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV -# -# - name: Download cuda.bindings build artifacts -# uses: actions/download-artifact@v4 -# with: -# name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} -# path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} -# -# - name: Display structure of downloaded cuda.bindings artifacts -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: | -# pwd -# ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR -# -# - name: Download cuda.core build artifacts -# uses: actions/download-artifact@v4 -# with: -# name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} -# path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} -# -# - name: Display structure of downloaded cuda.core build artifacts -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: | -# pwd -# ls -lahR $CUDA_CORE_ARTIFACTS_DIR -# -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v5 -# with: -# python-version: ${{ matrix.python-version }} -# -# - name: Set up mini CTK -# uses: ./.github/actions/fetch_ctk -# continue-on-error: false -# with: -# host-platform: ${{ matrix.host-platform }} -# cuda-version: ${{ matrix.cuda-version }} -# -# - name: Run cuda.bindings tests -# if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: | -# ls $CUDA_PATH -# -# pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" -# pip install *.whl -# popd -# -# pushd ./cuda_bindings -# pip install -r requirements.txt -# pytest -rxXs tests/ -# # TODO: enable cython tests -# #pytest tests/cython -# popd -# -# - name: Run cuda.core tests -# shell: bash --noprofile --norc -xeuo pipefail {0} -# run: | -# if [[ $SKIP_CUDA_BINDINGS_TEST == 1 ]]; then -# # TODO: remove this hack once cuda-python has a cp313 build -# if [[ ${{ matrix.python-version }} == "3.13" ]]; then -# echo "Python 3.13 + cuda-python ${{ matrix.cuda-version }} is not supported, skipping the test..." -# exit 0 -# fi -# fi -# -# # If build/test majors match: cuda.bindings is installed in the previous step. -# # If mismatch: cuda.bindings is installed from PyPI. -# TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" -# pushd "${CUDA_CORE_ARTIFACTS_DIR}" -# pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] -# popd -# -# pushd ./cuda_core -# # TODO: add requirements.txt for test deps? -# pip install pytest -# # TODO: add CuPy to test deps (which would require cuRAND) -# # pip install "cupy-cuda${TEST_CUDA_MAJOR}x" -# pytest -rxXs tests/ -# popd + test: + strategy: + fail-fast: false + # TODO: add driver version here + matrix: + host-platform: + - linux-64 + - linux-aarch64 + # TODO: enable testing once win-64 GPU runners are up + # - win-64 + python-version: + - "3.13" + - "3.12" + - "3.11" + - "3.10" + - "3.9" + cuda-version: + # Note: this is for test-time only. + - "12.6.2" + - "12.0.1" + - "11.8.0" + runner: + - default + include: + - host-platform: linux-64 + python-version: "3.12" + cuda-version: "12.6.2" + runner: H100 + name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}) + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && always() }} + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || + (matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || + (matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} + # Our self-hosted runners require a container + # TODO: use a different (nvidia?) container + container: + options: -u root --security-opt seccomp=unconfined --shm-size 16g + image: ubuntu:22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + needs: + - build + steps: + - name: Ensure GPU is working + shell: bash --noprofile --norc -xeuo pipefail {0} + run: nvidia-smi + + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set environment variables + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + PYTHON_VERSION_FORMATTED=$(echo '${{ matrix.python-version }}' | tr -d '.') + if [[ "${{ matrix.host-platform }}" == linux* ]]; then + REPO_DIR=$(pwd) + elif [[ "${{ matrix.host-platform }}" == win* ]]; then + PWD=$(pwd) + REPO_DIR=$(cygpath -w $PWD) + fi + + BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ needs.build.outputs.BUILD_CTK_VER }})" + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" + if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then + SKIP_CUDA_BINDINGS_TEST=1 + else + SKIP_CUDA_BINDINGS_TEST=0 + fi + + # make outputs from the previous job as env vars + echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV + echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV + + - name: Download cuda.bindings build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.bindings artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR + + - name: Download cuda.core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.core build artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $CUDA_CORE_ARTIFACTS_DIR + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up mini CTK + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ matrix.host-platform }} + cuda-version: ${{ matrix.cuda-version }} + + - name: Run cuda.bindings tests + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + ls $CUDA_PATH + + pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + pip install *.whl + popd + + pushd ./cuda_bindings + pip install -r requirements.txt + pytest -rxXs tests/ + # TODO: enable cython tests + #pytest tests/cython + popd + + - name: Run cuda.core tests + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + if [[ $SKIP_CUDA_BINDINGS_TEST == 1 ]]; then + # TODO: remove this hack once cuda-python has a cp313 build + if [[ ${{ matrix.python-version }} == "3.13" ]]; then + echo "Python 3.13 + cuda-python ${{ matrix.cuda-version }} is not supported, skipping the test..." + exit 0 + fi + fi + + # If build/test majors match: cuda.bindings is installed in the previous step. + # If mismatch: cuda.bindings is installed from PyPI. + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" + pushd "${CUDA_CORE_ARTIFACTS_DIR}" + pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] + popd + + pushd ./cuda_core + # TODO: add requirements.txt for test deps? + pip install pytest + # TODO: add CuPy to test deps (which would require cuRAND) + # pip install "cupy-cuda${TEST_CUDA_MAJOR}x" + pytest -rxXs tests/ + popd doc: # The build stage could fail but we want the CI to keep moving. diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 69d38a32b1..456b5d31bf 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -17,6 +17,10 @@ jobs: run: shell: bash -el {0} steps: + # WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326,327) + - name: Ensure GPU is working + run: nvidia-smi + - name: Checkout ${{ github.event.repository.name }} uses: actions/checkout@v4 with: From 45218b861cedf2cd8f327d43129e2b6ab48ef704 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 23:28:02 +0000 Subject: [PATCH 16/19] try github-pages-deploy-action --- .github/workflows/gh-build-docs.yml | 56 ++++++----------------------- 1 file changed, 11 insertions(+), 45 deletions(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index 456b5d31bf..cda1caf9c3 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -115,48 +115,14 @@ jobs: path: artifacts/ retention-days: 3 - # The steps below are not executed unless when building on main. - - name: Configure git - if: ${{ github.ref_name == 'main' }} - run: | - git config --local user.email "noreply@nvidia.com" - git config --local user.name "cuda-python-bot" - - - name: Checkout the gh-pages branch - if: ${{ github.ref_name == 'main' }} - run: | - git fetch origin gh-pages - git checkout gh-pages - - - name: Move artifacts to doc root - if: ${{ github.ref_name == 'main' }} - run: | - mv artifacts/docs/* docs/ - git status - - - name: Commit changes - if: ${{ github.ref_name == 'main' }} - run: | - git add docs/ - git status - git commit -m "Deploy: ${{ github.sha }}" - continue-on-error: true - - - name: Push changes - if: ${{ github.ref_name == 'main' && success() }} - run: | - git push origin gh-pages - - # TODO: discuss if we want to abandon branch-based doc deployment - # deploy: - # # Only deploy the latest docs when building on main - # if: ${{ github.ref_name == 'main' }} - # environment: - # name: github-pages - # url: ${{ steps.deployment.outputs.page_url }} - # runs-on: ubuntu-latest - # needs: build - # steps: - # - name: Deploy to GitHub Pages - # id: deployment - # uses: actions/deploy-pages@v4 + # The step below is not executed unless when building on main. + - name: Deploy doc update + #if: ${{ github.ref_name == 'main' && success() }} + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: artifacts/docs/ + git-config-name: cuda-python-bot + target-folder: docs/ + commit-message: "Deploy latest docs: ${{ github.sha }}" + clean: false + dry-run: true From 645fc39ba538194e627bf71c81bff1f66f5c8e21 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 23:51:37 +0000 Subject: [PATCH 17/19] remove dry-run tag --- .github/workflows/gh-build-docs.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/gh-build-docs.yml index cda1caf9c3..4eabd73dc4 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/gh-build-docs.yml @@ -117,7 +117,7 @@ jobs: # The step below is not executed unless when building on main. - name: Deploy doc update - #if: ${{ github.ref_name == 'main' && success() }} + if: ${{ github.ref_name == 'main' && success() }} uses: JamesIves/github-pages-deploy-action@v4 with: folder: artifacts/docs/ @@ -125,4 +125,3 @@ jobs: target-folder: docs/ commit-message: "Deploy latest docs: ${{ github.sha }}" clean: false - dry-run: true From 2482fe7733931a1c871fb42d8027eea2075892e8 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 28 Dec 2024 23:59:05 +0000 Subject: [PATCH 18/19] update concurrency group name --- .github/workflows/ci-gh.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index f61fd12816..091fc4e0ed 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -1,8 +1,10 @@ name: CI concurrency: - #group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ + github.ref_name == 'main' && format('ci-main-build-test-{0}', github.run_id) || + format('ci-pr-build-test-on-{0}-against-branch-{1}', github.event_name, github.ref_name) + }} cancel-in-progress: true on: From 89748a57e08ddbe80403e0110a1f0cd5ca46c154 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 30 Dec 2024 05:35:17 +0000 Subject: [PATCH 19/19] incorporate the ci-gh workflow into build-and-test + ensure all workflows have a name --- ...-build-and-test.yml => build-and-test.yml} | 17 ++++++++++++++-- .../{gh-build-docs.yml => build-docs.yml} | 2 ++ .github/workflows/ci-gh.yml | 20 ------------------- 3 files changed, 17 insertions(+), 22 deletions(-) rename .github/workflows/{gh-build-and-test.yml => build-and-test.yml} (96%) rename .github/workflows/{gh-build-docs.yml => build-docs.yml} (99%) delete mode 100644 .github/workflows/ci-gh.yml diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/build-and-test.yml similarity index 96% rename from .github/workflows/gh-build-and-test.yml rename to .github/workflows/build-and-test.yml index 0c3bec4c14..759485c499 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -1,4 +1,17 @@ -on: workflow_call +name: "CI: Build and test" + +concurrency: + group: ${{ github.workflow }}-${{ + github.ref_name == 'main' && format('ci-main-build-test-{0}', github.run_id) || + format('ci-pr-build-test-on-{0}-against-branch-{1}', github.event_name, github.ref_name) + }} + cancel-in-progress: true + +on: + push: + branches: + - "pull-request/[0-9]+" + - "main" jobs: build: @@ -332,6 +345,6 @@ jobs: - build secrets: inherit uses: - ./.github/workflows/gh-build-docs.yml + ./.github/workflows/build-docs.yml with: build_ctk_ver: ${{ needs.build.outputs.BUILD_CTK_VER }} diff --git a/.github/workflows/gh-build-docs.yml b/.github/workflows/build-docs.yml similarity index 99% rename from .github/workflows/gh-build-docs.yml rename to .github/workflows/build-docs.yml index 4eabd73dc4..71f360ba0c 100644 --- a/.github/workflows/gh-build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -1,3 +1,5 @@ +name: "CI: Build and update docs" + on: workflow_call: inputs: diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml deleted file mode 100644 index 091fc4e0ed..0000000000 --- a/.github/workflows/ci-gh.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: CI - -concurrency: - group: ${{ github.workflow }}-${{ - github.ref_name == 'main' && format('ci-main-build-test-{0}', github.run_id) || - format('ci-pr-build-test-on-{0}-against-branch-{1}', github.event_name, github.ref_name) - }} - cancel-in-progress: true - -on: - push: - branches: - - "pull-request/[0-9]+" - - "main" - -jobs: - ci: - uses: - ./.github/workflows/gh-build-and-test.yml - secrets: inherit