From d3a0fb0cd9b72dbf3e36ac6b172e6636329967f5 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 18:38:39 +0100 Subject: [PATCH 1/8] update cron tests Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 6 +++--- Dockerfile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 9803d459dc..a36cfbcdb9 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -62,7 +62,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.06"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -106,7 +106,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.06"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -204,7 +204,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' needs: cron-gpu # so that monai itself is verified first container: - image: nvcr.io/nvidia/pytorch:21.06-py3 # testing with the latest pytorch base image + image: nvcr.io/nvidia/pytorch:21.08-py3 # testing with the latest pytorch base image options: "--gpus all --ipc=host" runs-on: [self-hosted, linux, x64, common] steps: diff --git a/Dockerfile b/Dockerfile index ac06183768..77fe1f828f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ # To build with a different base image # please run `docker build` using the `--build-arg PYTORCH_IMAGE=...` flag. -ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.06-py3 +ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.08-py3 FROM ${PYTORCH_IMAGE} LABEL maintainer="monai.contact@gmail.com" From 911c3322e6ad731abf7c5a7de3f62ca697b373dd Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 18:39:46 +0100 Subject: [PATCH 2/8] temp tests Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 88 +++----------------------------------- 1 file changed, 5 insertions(+), 83 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index a36cfbcdb9..6afdffb6c4 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -1,68 +1,20 @@ name: crons on: + push: + branches: + - testing-2108 # schedule: # - cron: "0 2 * * *" # at 02:00 UTC # Allows you to run this workflow manually from the Actions tab workflow_dispatch: jobs: - cron-gpu: - if: github.repository == 'Project-MONAI/MONAI' - container: - image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 - options: "--gpus all" - runs-on: [self-hosted, linux, x64, common] - strategy: - matrix: - pytorch-version: [1.5.1, 1.6.0, 1.7.1, 1.8.1, latest] - steps: - - uses: actions/checkout@v2 - - name: Install the dependencies - run: | - which python - python -m pip install --upgrade pip wheel - python -m pip uninstall -y torch torchvision - if [ ${{ matrix.pytorch-version }} == "latest" ]; then - python -m pip install torch torchvision - elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then - python -m pip install torch==1.5.1 torchvision==0.6.1 - elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then - python -m pip install torch==1.6.0 torchvision==0.7.0 - elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then - python -m pip install torch==1.7.1 torchvision==0.8.2 - elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then - python -m pip install torch==1.8.1 torchvision==0.9.1 - fi - python -m pip install -r requirements-dev.txt - python -m pip list - - name: Run tests report coverage - run: | - export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ] - echo "Sleep $LAUNCH_DELAY" - sleep $LAUNCH_DELAY - nvidia-smi - export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) - echo $CUDA_VISIBLE_DEVICES - trap 'if pgrep python; then pkill python; fi;' ERR - python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null & - python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" - python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))' - BUILD_MONAI=1 ./runtests.sh --coverage --unittests # unit tests with coverage report - BUILD_MONAI=1 ./runtests.sh --coverage --net # integration tests with coverage report - coverage xml - if pgrep python; then pkill python; fi - - name: Upload coverage - uses: codecov/codecov-action@v1 - with: - fail_ci_if_error: false - file: ./coverage.xml - cron-pt-image: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. + container: ["pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -106,7 +58,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. + container: ["pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -170,36 +122,6 @@ jobs: PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/runner.py -p 'test_((?!integration).)' # unit tests if pgrep python; then pkill python; fi - cron-docker: - if: github.repository == 'Project-MONAI/MONAI' - container: - image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image - options: "--gpus all" - runs-on: [self-hosted, linux, x64, common] - steps: - - name: Run tests report coverage - # The docker image process has done the compilation. - # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag. - run: | - cd /opt/monai - nvidia-smi - export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) - echo $CUDA_VISIBLE_DEVICES - trap 'if pgrep python; then pkill python; fi;' ERR - python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null & - python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" - python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' - ngc --version - BUILD_MONAI=1 ./runtests.sh --coverage --pytype --unittests # unit tests with pytype checks, coverage report - BUILD_MONAI=1 ./runtests.sh --coverage --net # integration tests with coverage report - coverage xml - if pgrep python; then pkill python; fi - - name: Upload coverage - uses: codecov/codecov-action@v1 - with: - fail_ci_if_error: false - file: ./coverage.xml - cron-tutorial-notebooks: if: github.repository == 'Project-MONAI/MONAI' needs: cron-gpu # so that monai itself is verified first From 52046e12db5ebb445682686afbde667fa9575f87 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 18:43:26 +0100 Subject: [PATCH 3/8] update gpu info Signed-off-by: Wenqi Li --- .github/workflows/pythonapp-gpu.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml index 72e164a499..999567ae16 100644 --- a/.github/workflows/pythonapp-gpu.yml +++ b/.github/workflows/pythonapp-gpu.yml @@ -23,7 +23,7 @@ jobs: - "PT17+CUDA102" - "PT17+CUDA110" - "PT18+CUDA102" - - "PT19+CUDA113" + - "PT19+CUDA114" - "PT19+CUDA102" include: - environment: PT16+CUDA110 @@ -40,10 +40,12 @@ jobs: - environment: PT18+CUDA102 pytorch: "torch==1.8.1 torchvision==0.9.1" base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04" - - environment: PT19+CUDA113 + - environment: PT19+CUDA114 # we explicitly set pytorch to -h to avoid pip install error + # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes + # 21.08: 1.10.0a0+3fd9dcf pytorch: "-h" - base: "nvcr.io/nvidia/pytorch:21.06-py3" + base: "nvcr.io/nvidia/pytorch:21.08-py3" - environment: PT19+CUDA102 pytorch: "torch==1.9.0 torchvision==0.10.0" base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04" @@ -91,9 +93,9 @@ jobs: python get-pip.py && \ rm get-pip.py; fi - - if: matrix.environment == 'PT19+CUDA113' - name: Optional Cupy dependency (cuda113) - run: echo "cupy-cuda113" >> requirements-dev.txt + - if: matrix.environment == 'PT19+CUDA114' + name: Optional Cupy dependency (cuda114) + run: echo "cupy-cuda114" >> requirements-dev.txt - name: Install dependencies run: | which python From e960dac2e0eb81fe518bf0a80b12d6bb1bdf965a Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 18:43:47 +0100 Subject: [PATCH 4/8] tmp tests Signed-off-by: Wenqi Li --- .github/workflows/pythonapp-gpu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml index 999567ae16..cf7f5f9ec5 100644 --- a/.github/workflows/pythonapp-gpu.yml +++ b/.github/workflows/pythonapp-gpu.yml @@ -6,6 +6,7 @@ on: branches: - main - releasing/* + - testing-2108 pull_request: concurrency: From 3a5b8f849d077aff121ea55332d23f74d4375a89 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 18:44:42 +0100 Subject: [PATCH 5/8] temp test Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 6afdffb6c4..312a0e5160 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -124,7 +124,6 @@ jobs: cron-tutorial-notebooks: if: github.repository == 'Project-MONAI/MONAI' - needs: cron-gpu # so that monai itself is verified first container: image: nvcr.io/nvidia/pytorch:21.08-py3 # testing with the latest pytorch base image options: "--gpus all --ipc=host" From dbf486306ba53fa328460929a9400ee649f78961 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 20:56:46 +0100 Subject: [PATCH 6/8] Revert "temp test" This reverts commit 3a5b8f849d077aff121ea55332d23f74d4375a89. Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 312a0e5160..6afdffb6c4 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -124,6 +124,7 @@ jobs: cron-tutorial-notebooks: if: github.repository == 'Project-MONAI/MONAI' + needs: cron-gpu # so that monai itself is verified first container: image: nvcr.io/nvidia/pytorch:21.08-py3 # testing with the latest pytorch base image options: "--gpus all --ipc=host" From d671b1e38d225331d12c7db49e1547512c832e31 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 20:57:02 +0100 Subject: [PATCH 7/8] Revert "tmp tests" This reverts commit e960dac2e0eb81fe518bf0a80b12d6bb1bdf965a. Signed-off-by: Wenqi Li --- .github/workflows/pythonapp-gpu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml index cf7f5f9ec5..999567ae16 100644 --- a/.github/workflows/pythonapp-gpu.yml +++ b/.github/workflows/pythonapp-gpu.yml @@ -6,7 +6,6 @@ on: branches: - main - releasing/* - - testing-2108 pull_request: concurrency: From 4bd25cfd46bf0de291ac6bfe97deaa65aa7cd25e Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Mon, 30 Aug 2021 20:57:13 +0100 Subject: [PATCH 8/8] Revert "temp tests" This reverts commit 911c3322e6ad731abf7c5a7de3f62ca697b373dd. Signed-off-by: Wenqi Li --- .github/workflows/cron.yml | 88 +++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index 6afdffb6c4..a36cfbcdb9 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -1,20 +1,68 @@ name: crons on: - push: - branches: - - testing-2108 # schedule: # - cron: "0 2 * * *" # at 02:00 UTC # Allows you to run this workflow manually from the Actions tab workflow_dispatch: jobs: + cron-gpu: + if: github.repository == 'Project-MONAI/MONAI' + container: + image: nvcr.io/nvidia/pytorch:20.03-py3 # CUDA 10.2 + options: "--gpus all" + runs-on: [self-hosted, linux, x64, common] + strategy: + matrix: + pytorch-version: [1.5.1, 1.6.0, 1.7.1, 1.8.1, latest] + steps: + - uses: actions/checkout@v2 + - name: Install the dependencies + run: | + which python + python -m pip install --upgrade pip wheel + python -m pip uninstall -y torch torchvision + if [ ${{ matrix.pytorch-version }} == "latest" ]; then + python -m pip install torch torchvision + elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then + python -m pip install torch==1.5.1 torchvision==0.6.1 + elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then + python -m pip install torch==1.6.0 torchvision==0.7.0 + elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then + python -m pip install torch==1.7.1 torchvision==0.8.2 + elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then + python -m pip install torch==1.8.1 torchvision==0.9.1 + fi + python -m pip install -r requirements-dev.txt + python -m pip list + - name: Run tests report coverage + run: | + export LAUNCH_DELAY=$[ $RANDOM % 16 * 60 ] + echo "Sleep $LAUNCH_DELAY" + sleep $LAUNCH_DELAY + nvidia-smi + export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + echo $CUDA_VISIBLE_DEVICES + trap 'if pgrep python; then pkill python; fi;' ERR + python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null & + python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))' + BUILD_MONAI=1 ./runtests.sh --coverage --unittests # unit tests with coverage report + BUILD_MONAI=1 ./runtests.sh --coverage --net # integration tests with coverage report + coverage xml + if pgrep python; then pkill python; fi + - name: Upload coverage + uses: codecov/codecov-action@v1 + with: + fail_ci_if_error: false + file: ./coverage.xml + cron-pt-image: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.08"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -58,7 +106,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.08"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:21.08"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -122,6 +170,36 @@ jobs: PYTHONPATH="$tmp_dir":$PYTHONPATH BUILD_MONAI=1 python ./tests/runner.py -p 'test_((?!integration).)' # unit tests if pgrep python; then pkill python; fi + cron-docker: + if: github.repository == 'Project-MONAI/MONAI' + container: + image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image + options: "--gpus all" + runs-on: [self-hosted, linux, x64, common] + steps: + - name: Run tests report coverage + # The docker image process has done the compilation. + # BUILD_MONAI=1 is necessary for triggering the USE_COMPILED flag. + run: | + cd /opt/monai + nvidia-smi + export CUDA_VISIBLE_DEVICES=$(python -m tests.utils) + echo $CUDA_VISIBLE_DEVICES + trap 'if pgrep python; then pkill python; fi;' ERR + python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null & + python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))" + python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))' + ngc --version + BUILD_MONAI=1 ./runtests.sh --coverage --pytype --unittests # unit tests with pytype checks, coverage report + BUILD_MONAI=1 ./runtests.sh --coverage --net # integration tests with coverage report + coverage xml + if pgrep python; then pkill python; fi + - name: Upload coverage + uses: codecov/codecov-action@v1 + with: + fail_ci_if_error: false + file: ./coverage.xml + cron-tutorial-notebooks: if: github.repository == 'Project-MONAI/MONAI' needs: cron-gpu # so that monai itself is verified first